14 import xml
.etree
.ElementTree
 
  17 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  18 from .subtitles 
import SubtitlesInfoExtractor
 
  25     compat_urllib_request
, 
  39 class YoutubeBaseInfoExtractor(InfoExtractor
): 
  40     """Provide base functions for Youtube extractors""" 
  41     _LOGIN_URL 
= 'https://accounts.google.com/ServiceLogin' 
  42     _LANG_URL 
= r
'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
  43     _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
  44     _NETRC_MACHINE 
= 'youtube' 
  45     # If True it will raise an error if no login info is provided 
  46     _LOGIN_REQUIRED 
= False 
  48     def report_lang(self
): 
  49         """Report attempt to set language.""" 
  50         self
.to_screen(u
'Setting language') 
  52     def _set_language(self
): 
  53         request 
= compat_urllib_request
.Request(self
._LANG
_URL
) 
  56             compat_urllib_request
.urlopen(request
).read() 
  57         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  58             self
._downloader
.report_warning(u
'unable to set language: %s' % compat_str(err
)) 
  63         (username
, password
) = self
._get
_login
_info
() 
  64         # No authentication to be performed 
  66             if self
._LOGIN
_REQUIRED
: 
  67                 raise ExtractorError(u
'No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  70         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
) 
  72             login_page 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
  73         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  74             self
._downloader
.report_warning(u
'unable to fetch login page: %s' % compat_str(err
)) 
  77         galx 
= self
._search
_regex
(r
'(?s)<input.+?name="GALX".+?value="(.+?)"', 
  78                                   login_page
, u
'Login GALX parameter') 
  82                 u
'continue': u
'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', 
  86                 u
'PersistentCookie': u
'yes', 
  88                 u
'bgresponse': u
'js_disabled', 
  89                 u
'checkConnection': u
'', 
  90                 u
'checkedDomains': u
'youtube', 
  95                 u
'signIn': u
'Sign in', 
  97                 u
'service': u
'youtube', 
 101         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode 
 103         login_form 
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
,v 
in login_form_strs
.items()) 
 104         login_data 
= compat_urllib_parse
.urlencode(login_form
).encode('ascii') 
 105         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
) 
 108             login_results 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
 109             if re
.search(r
'(?i)<form[^>]* id="gaia_loginform"', login_results
) is not None: 
 110                 self
._downloader
.report_warning(u
'unable to log in: bad username or password') 
 112         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 113             self
._downloader
.report_warning(u
'unable to log in: %s' % compat_str(err
)) 
 117     def _confirm_age(self
): 
 120                 'action_confirm':   'Confirm', 
 122         request 
= compat_urllib_request
.Request(self
._AGE
_URL
, compat_urllib_parse
.urlencode(age_form
)) 
 124             self
.report_age_confirmation() 
 125             compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
 126         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 127             raise ExtractorError(u
'Unable to confirm age: %s' % compat_str(err
)) 
 130     def _real_initialize(self
): 
 131         if self
._downloader 
is None: 
 133         if not self
._set
_language
(): 
 135         if not self
._login
(): 
 140 class YoutubeIE(YoutubeBaseInfoExtractor
, SubtitlesInfoExtractor
): 
 141     IE_DESC 
= u
'YouTube.com' 
 144                          (?:https?://)?                                       # http(s):// (optional) 
 145                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| 
 146                             tube\.majestyc\.net/| 
 147                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains 
 148                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls 
 149                          (?:                                                  # the various things that can precede the ID: 
 150                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ 
 151                              |(?:                                             # or the v= param in all its forms 
 152                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 
 153                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! 
 154                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) 
 158                          |youtu\.be/                                          # just youtu.be/xxxx 
 160                      )?                                                       # all until now is optional -> you can pass the naked ID 
 161                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID 
 162                      (?(1).+)?                                                # if we found the ID, everything can follow 
 164     _NEXT_URL_RE 
= r
'[\?&]next_url=([^&]+)' 
 165     # Listed in order of quality 
 166     _available_formats 
= ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13', 
 167                           # Apple HTTP Live Streaming 
 168                           '96', '95', '94', '93', '92', '132', '151', 
 170                           '85', '84', '102', '83', '101', '82', '100', 
 172                           '138', '137', '248', '136', '247', '135', '246', 
 173                           '245', '244', '134', '243', '133', '242', '160', 
 175                           '141', '172', '140', '171', '139', 
 177     _available_formats_prefer_free 
= ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13', 
 178                                       # Apple HTTP Live Streaming 
 179                                       '96', '95', '94', '93', '92', '132', '151', 
 181                                       '85', '102', '84', '101', '83', '100', '82', 
 183                                       '138', '248', '137', '247', '136', '246', '245', 
 184                                       '244', '135', '243', '134', '242', '133', '160', 
 186                                       '172', '141', '171', '140', '139', 
 188     _video_formats_map 
= { 
 189         'flv': ['35', '34', '6', '5'], 
 190         '3gp': ['36', '17', '13'], 
 191         'mp4': ['38', '37', '22', '18'], 
 192         'webm': ['46', '45', '44', '43'], 
 194     _video_extensions 
= { 
 216         # Apple HTTP Live Streaming 
 250     _video_dimensions 
= { 
 332             u
"url":  u
"http://www.youtube.com/watch?v=BaW_jenozKc", 
 333             u
"file":  u
"BaW_jenozKc.mp4", 
 335                 u
"title": u
"youtube-dl test video \"'/\\ä↭𝕐", 
 336                 u
"uploader": u
"Philipp Hagemeister", 
 337                 u
"uploader_id": u
"phihag", 
 338                 u
"upload_date": u
"20121002", 
 339                 u
"description": u
"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." 
 343             u
"url":  u
"http://www.youtube.com/watch?v=UxxajLWwzqY", 
 344             u
"file":  u
"UxxajLWwzqY.mp4", 
 345             u
"note": u
"Test generic use_cipher_signature video (#897)", 
 347                 u
"upload_date": u
"20120506", 
 348                 u
"title": u
"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", 
 349                 u
"description": u
"md5:5b292926389560516e384ac437c0ec07", 
 350                 u
"uploader": u
"Icona Pop", 
 351                 u
"uploader_id": u
"IconaPop" 
 355             u
"url":  u
"https://www.youtube.com/watch?v=07FYdnEawAQ", 
 356             u
"file":  u
"07FYdnEawAQ.mp4", 
 357             u
"note": u
"Test VEVO video with age protection (#956)", 
 359                 u
"upload_date": u
"20130703", 
 360                 u
"title": u
"Justin Timberlake - Tunnel Vision (Explicit)", 
 361                 u
"description": u
"md5:64249768eec3bc4276236606ea996373", 
 362                 u
"uploader": u
"justintimberlakeVEVO", 
 363                 u
"uploader_id": u
"justintimberlakeVEVO" 
 370     def suitable(cls
, url
): 
 371         """Receives a URL and returns True if suitable for this IE.""" 
 372         if YoutubePlaylistIE
.suitable(url
): return False 
 373         return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None 
 375     def __init__(self
, *args
, **kwargs
): 
 376         super(YoutubeIE
, self
).__init
__(*args
, **kwargs
) 
 377         self
._player
_cache 
= {} 
 379     def report_video_webpage_download(self
, video_id
): 
 380         """Report attempt to download video webpage.""" 
 381         self
.to_screen(u
'%s: Downloading video webpage' % video_id
) 
 383     def report_video_info_webpage_download(self
, video_id
): 
 384         """Report attempt to download video info webpage.""" 
 385         self
.to_screen(u
'%s: Downloading video info webpage' % video_id
) 
 387     def report_information_extraction(self
, video_id
): 
 388         """Report attempt to extract video information.""" 
 389         self
.to_screen(u
'%s: Extracting video information' % video_id
) 
 391     def report_unavailable_format(self
, video_id
, format
): 
 392         """Report extracted video URL.""" 
 393         self
.to_screen(u
'%s: Format %s not available' % (video_id
, format
)) 
 395     def report_rtmp_download(self
): 
 396         """Indicate the download will use the RTMP protocol.""" 
 397         self
.to_screen(u
'RTMP download detected') 
 399     def _extract_signature_function(self
, video_id
, player_url
, slen
): 
 400         id_m 
= re
.match(r
'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', 
 402         player_type 
= id_m
.group('ext') 
 403         player_id 
= id_m
.group('id') 
 405         # Read from filesystem cache 
 406         func_id 
= '%s_%s_%d' % (player_type
, player_id
, slen
) 
 407         assert os
.path
.basename(func_id
) == func_id
 
 408         cache_dir 
= get_cachedir(self
._downloader
.params
) 
 410         cache_enabled 
= cache_dir 
is not None 
 412             cache_fn 
= os
.path
.join(os
.path
.expanduser(cache_dir
), 
 416                 with io
.open(cache_fn
, 'r', encoding
='utf-8') as cachef
: 
 417                     cache_spec 
= json
.load(cachef
) 
 418                 return lambda s
: u
''.join(s
[i
] for i 
in cache_spec
) 
 420                 pass  # No cache available 
 422         if player_type 
== 'js': 
 423             code 
= self
._download
_webpage
( 
 424                 player_url
, video_id
, 
 425                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 426                 errnote
=u
'Download of %s failed' % player_url
) 
 427             res 
= self
._parse
_sig
_js
(code
) 
 428         elif player_type 
== 'swf': 
 429             urlh 
= self
._request
_webpage
( 
 430                 player_url
, video_id
, 
 431                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 432                 errnote
=u
'Download of %s failed' % player_url
) 
 434             res 
= self
._parse
_sig
_swf
(code
) 
 436             assert False, 'Invalid player type %r' % player_type
 
 440                 test_string 
= u
''.join(map(compat_chr
, range(slen
))) 
 441                 cache_res 
= res(test_string
) 
 442                 cache_spec 
= [ord(c
) for c 
in cache_res
] 
 444                     os
.makedirs(os
.path
.dirname(cache_fn
)) 
 445                 except OSError as ose
: 
 446                     if ose
.errno 
!= errno
.EEXIST
: 
 448                 write_json_file(cache_spec
, cache_fn
) 
 450                 tb 
= traceback
.format_exc() 
 451                 self
._downloader
.report_warning( 
 452                     u
'Writing cache to %r failed: %s' % (cache_fn
, tb
)) 
 456     def _print_sig_code(self
, func
, slen
): 
 457         def gen_sig_code(idxs
): 
 458             def _genslice(start
, end
, step
): 
 459                 starts 
= u
'' if start 
== 0 else str(start
) 
 460                 ends 
= (u
':%d' % (end
+step
)) if end 
+ step 
>= 0 else u
':' 
 461                 steps 
= u
'' if step 
== 1 else (u
':%d' % step
) 
 462                 return u
's[%s%s%s]' % (starts
, ends
, steps
) 
 465             start 
= '(Never used)'  # Quelch pyflakes warnings - start will be 
 466                                     # set as soon as step is set 
 467             for i
, prev 
in zip(idxs
[1:], idxs
[:-1]): 
 471                     yield _genslice(start
, prev
, step
) 
 474                 if i 
- prev 
in [-1, 1]: 
 479                     yield u
's[%d]' % prev
 
 483                 yield _genslice(start
, i
, step
) 
 485         test_string 
= u
''.join(map(compat_chr
, range(slen
))) 
 486         cache_res 
= func(test_string
) 
 487         cache_spec 
= [ord(c
) for c 
in cache_res
] 
 488         expr_code 
= u
' + '.join(gen_sig_code(cache_spec
)) 
 489         code 
= u
'if len(s) == %d:\n    return %s\n' % (slen
, expr_code
) 
 490         self
.to_screen(u
'Extracted signature function:\n' + code
) 
 492     def _parse_sig_js(self
, jscode
): 
 493         funcname 
= self
._search
_regex
( 
 494             r
'signature=([a-zA-Z]+)', jscode
, 
 495             u
'Initial JS player signature function name') 
 500             return string
.lowercase
.index(varname
) 
 502         def interpret_statement(stmt
, local_vars
, allow_recursion
=20): 
 503             if allow_recursion 
< 0: 
 504                 raise ExtractorError(u
'Recursion limit reached') 
 506             if stmt
.startswith(u
'var '): 
 507                 stmt 
= stmt
[len(u
'var '):] 
 508             ass_m 
= re
.match(r
'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + 
 509                              r
'=(?P<expr>.*)$', stmt
) 
 511                 if ass_m
.groupdict().get('index'): 
 513                         lvar 
= local_vars
[ass_m
.group('out')] 
 514                         idx 
= interpret_expression(ass_m
.group('index'), 
 515                                                    local_vars
, allow_recursion
) 
 516                         assert isinstance(idx
, int) 
 519                     expr 
= ass_m
.group('expr') 
 522                         local_vars
[ass_m
.group('out')] = val
 
 524                     expr 
= ass_m
.group('expr') 
 525             elif stmt
.startswith(u
'return '): 
 527                 expr 
= stmt
[len(u
'return '):] 
 529                 raise ExtractorError( 
 530                     u
'Cannot determine left side of statement in %r' % stmt
) 
 532             v 
= interpret_expression(expr
, local_vars
, allow_recursion
) 
 535         def interpret_expression(expr
, local_vars
, allow_recursion
): 
 540                 return local_vars
[expr
] 
 542             m 
= re
.match(r
'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr
) 
 544                 member 
= m
.group('member') 
 545                 val 
= local_vars
[m
.group('in')] 
 546                 if member 
== 'split("")': 
 548                 if member 
== 'join("")': 
 550                 if member 
== 'length': 
 552                 if member 
== 'reverse()': 
 554                 slice_m 
= re
.match(r
'slice\((?P<idx>.*)\)', member
) 
 556                     idx 
= interpret_expression( 
 557                         slice_m
.group('idx'), local_vars
, allow_recursion
-1) 
 561                 r
'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr
) 
 563                 val 
= local_vars
[m
.group('in')] 
 564                 idx 
= interpret_expression(m
.group('idx'), local_vars
, 
 568             m 
= re
.match(r
'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr
) 
 570                 a 
= interpret_expression(m
.group('a'), 
 571                                          local_vars
, allow_recursion
) 
 572                 b 
= interpret_expression(m
.group('b'), 
 573                                          local_vars
, allow_recursion
) 
 577                 r
'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr
) 
 579                 fname 
= m
.group('func') 
 580                 if fname 
not in functions
: 
 581                     functions
[fname
] = extract_function(fname
) 
 582                 argvals 
= [int(v
) if v
.isdigit() else local_vars
[v
] 
 583                            for v 
in m
.group('args').split(',')] 
 584                 return functions
[fname
](argvals
) 
 585             raise ExtractorError(u
'Unsupported JS expression %r' % expr
) 
 587         def extract_function(funcname
): 
 589                 r
'function ' + re
.escape(funcname
) + 
 590                 r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', 
 592             argnames 
= func_m
.group('args').split(',') 
 595                 local_vars 
= dict(zip(argnames
, args
)) 
 596                 for stmt 
in func_m
.group('code').split(';'): 
 597                     res 
= interpret_statement(stmt
, local_vars
) 
 601         initial_function 
= extract_function(funcname
) 
 602         return lambda s
: initial_function([s
]) 
 604     def _parse_sig_swf(self
, file_contents
): 
 605         if file_contents
[1:3] != b
'WS': 
 606             raise ExtractorError( 
 607                 u
'Not an SWF file; header is %r' % file_contents
[:3]) 
 608         if file_contents
[:1] == b
'C': 
 609             content 
= zlib
.decompress(file_contents
[8:]) 
 611             raise NotImplementedError(u
'Unsupported compression format %r' % 
 614         def extract_tags(content
): 
 616             while pos 
< len(content
): 
 617                 header16 
= struct
.unpack('<H', content
[pos
:pos
+2])[0] 
 619                 tag_code 
= header16 
>> 6 
 620                 tag_len 
= header16 
& 0x3f 
 622                     tag_len 
= struct
.unpack('<I', content
[pos
:pos
+4])[0] 
 624                 assert pos
+tag_len 
<= len(content
) 
 625                 yield (tag_code
, content
[pos
:pos
+tag_len
]) 
 629                         for tag_code
, tag 
in extract_tags(content
) 
 631         p 
= code_tag
.index(b
'\0', 4) + 1 
 632         code_reader 
= io
.BytesIO(code_tag
[p
:]) 
 634         # Parse ABC (AVM2 ByteCode) 
 635         def read_int(reader
=None): 
 643                 b 
= struct
.unpack('<B', buf
)[0] 
 644                 res 
= res | 
((b 
& 0x7f) << shift
) 
 650         def u30(reader
=None): 
 651             res 
= read_int(reader
) 
 652             assert res 
& 0xf0000000 == 0 
 656         def s32(reader
=None): 
 658             if v 
& 0x80000000 != 0: 
 659                 v 
= - ((v ^ 
0xffffffff) + 1) 
 662         def read_string(reader
=None): 
 666             resb 
= reader
.read(slen
) 
 667             assert len(resb
) == slen
 
 668             return resb
.decode('utf-8') 
 670         def read_bytes(count
, reader
=None): 
 673             resb 
= reader
.read(count
) 
 674             assert len(resb
) == count
 
 677         def read_byte(reader
=None): 
 678             resb 
= read_bytes(1, reader
=reader
) 
 679             res 
= struct
.unpack('<B', resb
)[0] 
 682         # minor_version + major_version 
 687         for _c 
in range(1, int_count
): 
 690         for _c 
in range(1, uint_count
): 
 693         read_bytes((double_count
-1) * 8) 
 695         constant_strings 
= [u
''] 
 696         for _c 
in range(1, string_count
): 
 698             constant_strings
.append(s
) 
 699         namespace_count 
= u30() 
 700         for _c 
in range(1, namespace_count
): 
 704         for _c 
in range(1, ns_set_count
): 
 706             for _c2 
in range(count
): 
 708         multiname_count 
= u30() 
 717             0x0e: 2,  # MultinameA 
 718             0x1b: 1,  # MultinameL 
 719             0x1c: 1,  # MultinameLA 
 722         for _c 
in range(1, multiname_count
): 
 724             assert kind 
in MULTINAME_SIZES
, u
'Invalid multiname kind %r' % kind
 
 726                 u30()  # namespace_idx 
 728                 multinames
.append(constant_strings
[name_idx
]) 
 730                 multinames
.append('[MULTINAME kind: %d]' % kind
) 
 731                 for _c2 
in range(MULTINAME_SIZES
[kind
]): 
 736         MethodInfo 
= collections
.namedtuple( 
 738             ['NEED_ARGUMENTS', 'NEED_REST']) 
 740         for method_id 
in range(method_count
): 
 743             for _ 
in range(param_count
): 
 745             u30()  # name index (always 0 for youtube) 
 747             if flags 
& 0x08 != 0: 
 750                 for c 
in range(option_count
): 
 753             if flags 
& 0x80 != 0: 
 754                 # Param names present 
 755                 for _ 
in range(param_count
): 
 757             mi 
= MethodInfo(flags 
& 0x01 != 0, flags 
& 0x04 != 0) 
 758             method_infos
.append(mi
) 
 761         metadata_count 
= u30() 
 762         for _c 
in range(metadata_count
): 
 765             for _c2 
in range(item_count
): 
 769         def parse_traits_info(): 
 770             trait_name_idx 
= u30() 
 771             kind_full 
= read_byte() 
 772             kind 
= kind_full 
& 0x0f 
 773             attrs 
= kind_full 
>> 4 
 775             if kind 
in [0x00, 0x06]:  # Slot or Const 
 777                 u30()  # type_name_idx 
 781             elif kind 
in [0x01, 0x02, 0x03]:  # Method / Getter / Setter 
 784                 methods
[multinames
[trait_name_idx
]] = method_idx
 
 785             elif kind 
== 0x04:  # Class 
 788             elif kind 
== 0x05:  # Function 
 791                 methods
[function_idx
] = multinames
[trait_name_idx
] 
 793                 raise ExtractorError(u
'Unsupported trait kind %d' % kind
) 
 795             if attrs 
& 0x4 != 0:  # Metadata present 
 796                 metadata_count 
= u30() 
 797                 for _c3 
in range(metadata_count
): 
 798                     u30()  # metadata index 
 803         TARGET_CLASSNAME 
= u
'SignatureDecipher' 
 804         searched_idx 
= multinames
.index(TARGET_CLASSNAME
) 
 805         searched_class_id 
= None 
 807         for class_id 
in range(class_count
): 
 809             if name_idx 
== searched_idx
: 
 810                 # We found the class we're looking for! 
 811                 searched_class_id 
= class_id
 
 812             u30()  # super_name idx 
 814             if flags 
& 0x08 != 0:  # Protected namespace is present 
 815                 u30()  # protected_ns_idx 
 817             for _c2 
in range(intrf_count
): 
 821             for _c2 
in range(trait_count
): 
 824         if searched_class_id 
is None: 
 825             raise ExtractorError(u
'Target class %r not found' % 
 830         for class_id 
in range(class_count
): 
 833             for _c2 
in range(trait_count
): 
 834                 trait_methods 
= parse_traits_info() 
 835                 if class_id 
== searched_class_id
: 
 836                     method_names
.update(trait_methods
.items()) 
 837                     method_idxs
.update(dict( 
 839                         for name
, idx 
in trait_methods
.items())) 
 843         for _c 
in range(script_count
): 
 846             for _c2 
in range(trait_count
): 
 850         method_body_count 
= u30() 
 851         Method 
= collections
.namedtuple('Method', ['code', 'local_count']) 
 853         for _c 
in range(method_body_count
): 
 857             u30()  # init_scope_depth 
 858             u30()  # max_scope_depth 
 860             code 
= read_bytes(code_length
) 
 861             if method_idx 
in method_idxs
: 
 862                 m 
= Method(code
, local_count
) 
 863                 methods
[method_idxs
[method_idx
]] = m
 
 864             exception_count 
= u30() 
 865             for _c2 
in range(exception_count
): 
 872             for _c2 
in range(trait_count
): 
 875         assert p 
+ code_reader
.tell() == len(code_tag
) 
 876         assert len(methods
) == len(method_idxs
) 
 878         method_pyfunctions 
= {} 
 880         def extract_function(func_name
): 
 881             if func_name 
in method_pyfunctions
: 
 882                 return method_pyfunctions
[func_name
] 
 883             if func_name 
not in methods
: 
 884                 raise ExtractorError(u
'Cannot find function %r' % func_name
) 
 885             m 
= methods
[func_name
] 
 888                 registers 
= ['(this)'] + list(args
) + [None] * m
.local_count
 
 890                 coder 
= io
.BytesIO(m
.code
) 
 892                     opcode 
= struct
.unpack('!B', coder
.read(1))[0] 
 893                     if opcode 
== 36:  # pushbyte 
 894                         v 
= struct
.unpack('!B', coder
.read(1))[0] 
 896                     elif opcode 
== 44:  # pushstring 
 898                         stack
.append(constant_strings
[idx
]) 
 899                     elif opcode 
== 48:  # pushscope 
 900                         # We don't implement the scope register, so we'll just 
 901                         # ignore the popped value 
 903                     elif opcode 
== 70:  # callproperty 
 905                         mname 
= multinames
[index
] 
 906                         arg_count 
= u30(coder
) 
 907                         args 
= list(reversed( 
 908                             [stack
.pop() for _ 
in range(arg_count
)])) 
 910                         if mname 
== u
'split': 
 911                             assert len(args
) == 1 
 912                             assert isinstance(args
[0], compat_str
) 
 913                             assert isinstance(obj
, compat_str
) 
 917                                 res 
= obj
.split(args
[0]) 
 919                         elif mname 
== u
'slice': 
 920                             assert len(args
) == 1 
 921                             assert isinstance(args
[0], int) 
 922                             assert isinstance(obj
, list) 
 925                         elif mname 
== u
'join': 
 926                             assert len(args
) == 1 
 927                             assert isinstance(args
[0], compat_str
) 
 928                             assert isinstance(obj
, list) 
 929                             res 
= args
[0].join(obj
) 
 931                         elif mname 
in method_pyfunctions
: 
 932                             stack
.append(method_pyfunctions
[mname
](args
)) 
 934                             raise NotImplementedError( 
 935                                 u
'Unsupported property %r on %r' 
 937                     elif opcode 
== 72:  # returnvalue 
 940                     elif opcode 
== 79:  # callpropvoid 
 942                         mname 
= multinames
[index
] 
 943                         arg_count 
= u30(coder
) 
 944                         args 
= list(reversed( 
 945                             [stack
.pop() for _ 
in range(arg_count
)])) 
 947                         if mname 
== u
'reverse': 
 948                             assert isinstance(obj
, list) 
 951                             raise NotImplementedError( 
 952                                 u
'Unsupported (void) property %r on %r' 
 954                     elif opcode 
== 93:  # findpropstrict 
 956                         mname 
= multinames
[index
] 
 957                         res 
= extract_function(mname
) 
 959                     elif opcode 
== 97:  # setproperty 
 964                         assert isinstance(obj
, list) 
 965                         assert isinstance(idx
, int) 
 967                     elif opcode 
== 98:  # getlocal 
 969                         stack
.append(registers
[index
]) 
 970                     elif opcode 
== 99:  # setlocal 
 973                         registers
[index
] = value
 
 974                     elif opcode 
== 102:  # getproperty 
 976                         pname 
= multinames
[index
] 
 977                         if pname 
== u
'length': 
 979                             assert isinstance(obj
, list) 
 980                             stack
.append(len(obj
)) 
 981                         else:  # Assume attribute access 
 983                             assert isinstance(idx
, int) 
 985                             assert isinstance(obj
, list) 
 986                             stack
.append(obj
[idx
]) 
 987                     elif opcode 
== 128:  # coerce 
 989                     elif opcode 
== 133:  # coerce_s 
 990                         assert isinstance(stack
[-1], (type(None), compat_str
)) 
 991                     elif opcode 
== 164:  # modulo 
 994                         res 
= value1 
% value2
 
 996                     elif opcode 
== 208:  # getlocal_0 
 997                         stack
.append(registers
[0]) 
 998                     elif opcode 
== 209:  # getlocal_1 
 999                         stack
.append(registers
[1]) 
1000                     elif opcode 
== 210:  # getlocal_2 
1001                         stack
.append(registers
[2]) 
1002                     elif opcode 
== 211:  # getlocal_3 
1003                         stack
.append(registers
[3]) 
1004                     elif opcode 
== 214:  # setlocal_2 
1005                         registers
[2] = stack
.pop() 
1006                     elif opcode 
== 215:  # setlocal_3 
1007                         registers
[3] = stack
.pop() 
1009                         raise NotImplementedError( 
1010                             u
'Unsupported opcode %d' % opcode
) 
1012             method_pyfunctions
[func_name
] = resfunc
 
1015         initial_function 
= extract_function(u
'decipher') 
1016         return lambda s
: initial_function([s
]) 
1018     def _decrypt_signature(self
, s
, video_id
, player_url
, age_gate
=False): 
1019         """Turn the encrypted s field into a working signature""" 
1021         if player_url 
is not None: 
1023                 player_id 
= (player_url
, len(s
)) 
1024                 if player_id 
not in self
._player
_cache
: 
1025                     func 
= self
._extract
_signature
_function
( 
1026                         video_id
, player_url
, len(s
) 
1028                     self
._player
_cache
[player_id
] = func
 
1029                 func 
= self
._player
_cache
[player_id
] 
1030                 if self
._downloader
.params
.get('youtube_print_sig_code'): 
1031                     self
._print
_sig
_code
(func
, len(s
)) 
1034                 tb 
= traceback
.format_exc() 
1035                 self
._downloader
.report_warning( 
1036                     u
'Automatic signature extraction failed: ' + tb
) 
1038             self
._downloader
.report_warning( 
1039                 u
'Warning: Falling back to static signature algorithm') 
1041         return self
._static
_decrypt
_signature
( 
1042             s
, video_id
, player_url
, age_gate
) 
1044     def _static_decrypt_signature(self
, s
, video_id
, player_url
, age_gate
): 
1046             # The videos with age protection use another player, so the 
1047             # algorithms can be different. 
1049                 return s
[2:63] + s
[82] + s
[64:82] + s
[63] 
1052             return s
[86:29:-1] + s
[88] + s
[28:5:-1] 
1054             return s
[25] + s
[3:25] + s
[0] + s
[26:42] + s
[79] + s
[43:79] + s
[91] + s
[80:83] 
1056             return s
[84:27:-1] + s
[86] + s
[26:5:-1] 
1058             return s
[25] + s
[3:25] + s
[2] + s
[26:40] + s
[77] + s
[41:77] + s
[89] + s
[78:81] 
1060             return s
[84:78:-1] + s
[87] + s
[77:60:-1] + s
[0] + s
[59:3:-1] 
1062             return s
[7:28] + s
[87] + s
[29:45] + s
[55] + s
[46:55] + s
[2] + s
[56:87] + s
[28] 
1064             return s
[6:27] + s
[4] + s
[28:39] + s
[27] + s
[40:59] + s
[2] + s
[60:] 
1066             return s
[80:72:-1] + s
[16] + s
[71:39:-1] + s
[72] + s
[38:16:-1] + s
[82] + s
[15::-1] 
1068             return s
[3:11] + s
[0] + s
[12:55] + s
[84] + s
[56:84] 
1070             return s
[78:70:-1] + s
[14] + s
[69:37:-1] + s
[70] + s
[36:14:-1] + s
[80] + s
[:14][::-1] 
1072             return s
[80:63:-1] + s
[0] + s
[62:0:-1] + s
[63] 
1074             return s
[80:37:-1] + s
[7] + s
[36:7:-1] + s
[0] + s
[6:0:-1] + s
[37] 
1076             return s
[56] + s
[79:56:-1] + s
[41] + s
[55:41:-1] + s
[80] + s
[40:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
1078             return s
[1:19] + s
[0] + s
[20:68] + s
[19] + s
[69:80] 
1080             return s
[54] + s
[77:54:-1] + s
[39] + s
[53:39:-1] + s
[78] + s
[38:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
1083             raise ExtractorError(u
'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s
))) 
1085     def _get_available_subtitles(self
, video_id
, webpage
): 
1087             sub_list 
= self
._download
_webpage
( 
1088                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
, 
1089                 video_id
, note
=False) 
1090         except ExtractorError 
as err
: 
1091             self
._downloader
.report_warning(u
'unable to download video subtitles: %s' % compat_str(err
)) 
1093         lang_list 
= re
.findall(r
'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list
) 
1098             params 
= compat_urllib_parse
.urlencode({ 
1101                 'fmt': self
._downloader
.params
.get('subtitlesformat'), 
1102                 'name': l
[0].encode('utf-8'), 
1104             url 
= u
'http://www.youtube.com/api/timedtext?' + params
 
1105             sub_lang_list
[lang
] = url
 
1106         if not sub_lang_list
: 
1107             self
._downloader
.report_warning(u
'video doesn\'t have subtitles') 
1109         return sub_lang_list
 
1111     def _get_available_automatic_caption(self
, video_id
, webpage
): 
1112         """We need the webpage for getting the captions url, pass it as an 
1113            argument to speed up the process.""" 
1114         sub_format 
= self
._downloader
.params
.get('subtitlesformat') 
1115         self
.to_screen(u
'%s: Looking for automatic captions' % video_id
) 
1116         mobj 
= re
.search(r
';ytplayer.config = ({.*?});', webpage
) 
1117         err_msg 
= u
'Couldn\'t find automatic captions for %s' % video_id
 
1119             self
._downloader
.report_warning(err_msg
) 
1121         player_config 
= json
.loads(mobj
.group(1)) 
1123             args 
= player_config
[u
'args'] 
1124             caption_url 
= args
[u
'ttsurl'] 
1125             timestamp 
= args
[u
'timestamp'] 
1126             # We get the available subtitles 
1127             list_params 
= compat_urllib_parse
.urlencode({ 
1132             list_url 
= caption_url 
+ '&' + list_params
 
1133             list_page 
= self
._download
_webpage
(list_url
, video_id
) 
1134             caption_list 
= xml
.etree
.ElementTree
.fromstring(list_page
.encode('utf-8')) 
1135             original_lang_node 
= caption_list
.find('track') 
1136             if original_lang_node 
is None or original_lang_node
.attrib
.get('kind') != 'asr' : 
1137                 self
._downloader
.report_warning(u
'Video doesn\'t have automatic captions') 
1139             original_lang 
= original_lang_node
.attrib
['lang_code'] 
1142             for lang_node 
in caption_list
.findall('target'): 
1143                 sub_lang 
= lang_node
.attrib
['lang_code'] 
1144                 params 
= compat_urllib_parse
.urlencode({ 
1145                     'lang': original_lang
, 
1151                 sub_lang_list
[sub_lang
] = caption_url 
+ '&' + params
 
1152             return sub_lang_list
 
1153         # An extractor error can be raise by the download process if there are 
1154         # no automatic captions but there are subtitles 
1155         except (KeyError, ExtractorError
): 
1156             self
._downloader
.report_warning(err_msg
) 
1159     def _print_formats(self
, formats
): 
1160         print('Available formats:') 
1162             print('%s\t:\t%s\t[%s]%s' %(x
, self
._video
_extensions
.get(x
, 'flv'), 
1163                                         self
._video
_dimensions
.get(x
, '???'), 
1164                                         ' ('+self
._special
_itags
[x
]+')' if x 
in self
._special
_itags 
else '')) 
1166     def _extract_id(self
, url
): 
1167         mobj 
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
) 
1169             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1170         video_id 
= mobj
.group(2) 
1173     def _get_video_url_list(self
, url_map
): 
1175         Transform a dictionary in the format {itag:url} to a list of (itag, url) 
1176         with the requested formats. 
1178         req_format 
= self
._downloader
.params
.get('format', None) 
1179         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
1180         available_formats 
= self
._available
_formats
_prefer
_free 
if self
._downloader
.params
.get('prefer_free_formats', False) else self
._available
_formats
 
1181         if format_limit 
is not None and format_limit 
in available_formats
: 
1182             format_list 
= available_formats
[available_formats
.index(format_limit
):] 
1184             format_list 
= available_formats
 
1185         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
1186         if len(existing_formats
) == 0: 
1187             raise ExtractorError(u
'no known formats available for video') 
1188         if self
._downloader
.params
.get('listformats', None): 
1189             self
._print
_formats
(existing_formats
) 
1191         if req_format 
is None or req_format 
== 'best': 
1192             video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
1193         elif req_format 
== 'worst': 
1194             video_url_list 
= [(existing_formats
[-1], url_map
[existing_formats
[-1]])] # worst quality 
1195         elif req_format 
in ('-1', 'all'): 
1196             video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
1198             # Specific formats. We pick the first in a slash-delimeted sequence. 
1199             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality 
1200             # available in the specified format. For example, 
1201             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. 
1202             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'. 
1203             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'. 
1204             req_formats 
= req_format
.split('/') 
1205             video_url_list 
= None 
1206             for rf 
in req_formats
: 
1208                     video_url_list 
= [(rf
, url_map
[rf
])] 
1210                 if rf 
in self
._video
_formats
_map
: 
1211                     for srf 
in self
._video
_formats
_map
[rf
]: 
1213                             video_url_list 
= [(srf
, url_map
[srf
])] 
1218             if video_url_list 
is None: 
1219                 raise ExtractorError(u
'requested format not available') 
1220         return video_url_list
 
1222     def _extract_from_m3u8(self
, manifest_url
, video_id
): 
1224         def _get_urls(_manifest
): 
1225             lines 
= _manifest
.split('\n') 
1226             urls 
= filter(lambda l
: l 
and not l
.startswith('#'), 
1229         manifest 
= self
._download
_webpage
(manifest_url
, video_id
, u
'Downloading formats manifest') 
1230         formats_urls 
= _get_urls(manifest
) 
1231         for format_url 
in formats_urls
: 
1232             itag 
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag') 
1233             url_map
[itag
] = format_url
 
1236     def _extract_annotations(self
, video_id
): 
1237         url 
= 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 
1238         return self
._download
_webpage
(url
, video_id
, note
=u
'Searching for annotations.', errnote
=u
'Unable to download video annotations.') 
1240     def _real_extract(self
, url
): 
1241         # Extract original video URL from URL with redirection, like age verification, using next_url parameter 
1242         mobj 
= re
.search(self
._NEXT
_URL
_RE
, url
) 
1244             url 
= 'https://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/') 
1245         video_id 
= self
._extract
_id
(url
) 
1248         self
.report_video_webpage_download(video_id
) 
1249         url 
= 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
 
1250         request 
= compat_urllib_request
.Request(url
) 
1252             video_webpage_bytes 
= compat_urllib_request
.urlopen(request
).read() 
1253         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
1254             raise ExtractorError(u
'Unable to download video webpage: %s' % compat_str(err
)) 
1256         video_webpage 
= video_webpage_bytes
.decode('utf-8', 'ignore') 
1258         # Attempt to extract SWF player URL 
1259         mobj 
= re
.search(r
'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1260         if mobj 
is not None: 
1261             player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1266         self
.report_video_info_webpage_download(video_id
) 
1267         if re
.search(r
'player-age-gate-content">', video_webpage
) is not None: 
1268             self
.report_age_confirmation() 
1270             # We simulate the access to the video from www.youtube.com/v/{video_id} 
1271             # this can be viewed without login into Youtube 
1272             data 
= compat_urllib_parse
.urlencode({'video_id': video_id
, 
1276                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id
, 
1280             video_info_url 
= 'https://www.youtube.com/get_video_info?' + data
 
1281             video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
1283                                     errnote
='unable to download video info webpage') 
1284             video_info 
= compat_parse_qs(video_info_webpage
) 
1287             for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1288                 video_info_url 
= ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1289                         % (video_id
, el_type
)) 
1290                 video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
1292                                         errnote
='unable to download video info webpage') 
1293                 video_info 
= compat_parse_qs(video_info_webpage
) 
1294                 if 'token' in video_info
: 
1296         if 'token' not in video_info
: 
1297             if 'reason' in video_info
: 
1298                 raise ExtractorError(u
'YouTube said: %s' % video_info
['reason'][0], expected
=True) 
1300                 raise ExtractorError(u
'"token" parameter not in video info for unknown reason') 
1302         # Check for "rental" videos 
1303         if 'ypc_video_rental_bar_text' in video_info 
and 'author' not in video_info
: 
1304             raise ExtractorError(u
'"rental" videos not supported') 
1306         # Start extracting information 
1307         self
.report_information_extraction(video_id
) 
1310         if 'author' not in video_info
: 
1311             raise ExtractorError(u
'Unable to extract uploader name') 
1312         video_uploader 
= compat_urllib_parse
.unquote_plus(video_info
['author'][0]) 
1315         video_uploader_id 
= None 
1316         mobj 
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
) 
1317         if mobj 
is not None: 
1318             video_uploader_id 
= mobj
.group(1) 
1320             self
._downloader
.report_warning(u
'unable to extract uploader nickname') 
1323         if 'title' in video_info
: 
1324             video_title 
= compat_urllib_parse
.unquote_plus(video_info
['title'][0]) 
1326             self
._downloader
.report_warning(u
'Unable to extract video title') 
1330         # We try first to get a high quality image: 
1331         m_thumb 
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">', 
1332                             video_webpage
, re
.DOTALL
) 
1333         if m_thumb 
is not None: 
1334             video_thumbnail 
= m_thumb
.group(1) 
1335         elif 'thumbnail_url' not in video_info
: 
1336             self
._downloader
.report_warning(u
'unable to extract video thumbnail') 
1337             video_thumbnail 
= None 
1338         else:   # don't panic if we can't find it 
1339             video_thumbnail 
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0]) 
1343         mobj 
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
1344         if mobj 
is not None: 
1345             upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1346             upload_date 
= unified_strdate(upload_date
) 
1349         video_description 
= get_element_by_id("eow-description", video_webpage
) 
1350         if video_description
: 
1351             video_description 
= clean_html(video_description
) 
1353             fd_mobj 
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
) 
1355                 video_description 
= unescapeHTML(fd_mobj
.group(1)) 
1357                 video_description 
= u
'' 
1360         video_subtitles 
= self
.extract_subtitles(video_id
, video_webpage
) 
1362         if self
._downloader
.params
.get('listsubtitles', False): 
1363             self
._list
_available
_subtitles
(video_id
, video_webpage
) 
1366         if 'length_seconds' not in video_info
: 
1367             self
._downloader
.report_warning(u
'unable to extract video duration') 
1370             video_duration 
= compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0]) 
1373         video_annotations 
= None 
1374         if self
._downloader
.params
.get('writeannotations', False): 
1375                 video_annotations 
= self
._extract
_annotations
(video_id
) 
1377         # Decide which formats to download 
1380             mobj 
= re
.search(r
';ytplayer.config = ({.*?});', video_webpage
) 
1382                 raise ValueError('Could not find vevo ID') 
1383             info 
= json
.loads(mobj
.group(1)) 
1385             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map 
1386             # this signatures are encrypted 
1387             if 'url_encoded_fmt_stream_map' not in args
: 
1388                 raise ValueError(u
'No stream_map present')  # caught below 
1389             re_signature 
= re
.compile(r
'[&,]s=') 
1390             m_s 
= re_signature
.search(args
['url_encoded_fmt_stream_map']) 
1392                 self
.to_screen(u
'%s: Encrypted signatures detected.' % video_id
) 
1393                 video_info
['url_encoded_fmt_stream_map'] = [args
['url_encoded_fmt_stream_map']] 
1394             m_s 
= re_signature
.search(args
.get('adaptive_fmts', u
'')) 
1396                 if 'adaptive_fmts' in video_info
: 
1397                     video_info
['adaptive_fmts'][0] += ',' + args
['adaptive_fmts'] 
1399                     video_info
['adaptive_fmts'] = [args
['adaptive_fmts']] 
1403         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1404             self
.report_rtmp_download() 
1405             video_url_list 
= [(None, video_info
['conn'][0])] 
1406         elif len(video_info
.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info
.get('adaptive_fmts', [])) >= 1: 
1407             encoded_url_map 
= video_info
.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info
.get('adaptive_fmts',[''])[0] 
1408             if 'rtmpe%3Dyes' in encoded_url_map
: 
1409                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True) 
1411             for url_data_str 
in encoded_url_map
.split(','): 
1412                 url_data 
= compat_parse_qs(url_data_str
) 
1413                 if 'itag' in url_data 
and 'url' in url_data
: 
1414                     url 
= url_data
['url'][0] 
1415                     if 'sig' in url_data
: 
1416                         url 
+= '&signature=' + url_data
['sig'][0] 
1417                     elif 's' in url_data
: 
1418                         encrypted_sig 
= url_data
['s'][0] 
1419                         if self
._downloader
.params
.get('verbose'): 
1421                                 if player_url 
is None: 
1422                                     player_version 
= 'unknown' 
1424                                     player_version 
= self
._search
_regex
( 
1425                                         r
'-(.+)\.swf$', player_url
, 
1426                                         u
'flash player', fatal
=False) 
1427                                 player_desc 
= 'flash player %s' % player_version
 
1429                                 player_version 
= self
._search
_regex
( 
1430                                     r
'html5player-(.+?)\.js', video_webpage
, 
1431                                     'html5 player', fatal
=False) 
1432                                 player_desc 
= u
'html5 player %s' % player_version
 
1434                             parts_sizes 
= u
'.'.join(compat_str(len(part
)) for part 
in encrypted_sig
.split('.')) 
1435                             self
.to_screen(u
'encrypted signature length %d (%s), itag %s, %s' % 
1436                                 (len(encrypted_sig
), parts_sizes
, url_data
['itag'][0], player_desc
)) 
1439                             jsplayer_url_json 
= self
._search
_regex
( 
1440                                 r
'"assets":.+?"js":\s*("[^"]+")', 
1441                                 video_webpage
, u
'JS player URL') 
1442                             player_url 
= json
.loads(jsplayer_url_json
) 
1444                         signature 
= self
._decrypt
_signature
( 
1445                             encrypted_sig
, video_id
, player_url
, age_gate
) 
1446                         url 
+= '&signature=' + signature
 
1447                     if 'ratebypass' not in url
: 
1448                         url 
+= '&ratebypass=yes' 
1449                     url_map
[url_data
['itag'][0]] = url
 
1450             video_url_list 
= self
._get
_video
_url
_list
(url_map
) 
1451             if not video_url_list
: 
1453         elif video_info
.get('hlsvp'): 
1454             manifest_url 
= video_info
['hlsvp'][0] 
1455             url_map 
= self
._extract
_from
_m
3u8(manifest_url
, video_id
) 
1456             video_url_list 
= self
._get
_video
_url
_list
(url_map
) 
1457             if not video_url_list
: 
1461             raise ExtractorError(u
'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') 
1464         for itag
, video_real_url 
in video_url_list
: 
1466             video_extension 
= self
._video
_extensions
.get(itag
, 'flv') 
1468             video_format 
= '{0} - {1}{2}'.format(itag 
if itag 
else video_extension
, 
1469                                               self
._video
_dimensions
.get(itag
, '???'), 
1470                                               ' ('+self
._special
_itags
[itag
]+')' if itag 
in self
._special
_itags 
else '') 
1474                 'url':      video_real_url
, 
1475                 'uploader': video_uploader
, 
1476                 'uploader_id': video_uploader_id
, 
1477                 'upload_date':  upload_date
, 
1478                 'title':    video_title
, 
1479                 'ext':      video_extension
, 
1480                 'format':   video_format
, 
1482                 'thumbnail':    video_thumbnail
, 
1483                 'description':  video_description
, 
1484                 'player_url':   player_url
, 
1485                 'subtitles':    video_subtitles
, 
1486                 'duration':     video_duration
, 
1487                 'age_limit':    18 if age_gate 
else 0, 
1488                 'annotations':  video_annotations
, 
1489                 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id
, 
1493 class YoutubePlaylistIE(InfoExtractor
): 
1494     IE_DESC 
= u
'YouTube.com playlists' 
1495     _VALID_URL 
= r
"""(?: 
1500                            (?:course|view_play_list|my_playlists|artist|playlist|watch) 
1501                            \? (?:.*?&)*? (?:p|a|list)= 
1504                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,}) 
1507                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) 
1509     _TEMPLATE_URL 
= 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' 
1511     IE_NAME 
= u
'youtube:playlist' 
1514     def suitable(cls
, url
): 
1515         """Receives a URL and returns True if suitable for this IE.""" 
1516         return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None 
1518     def _real_extract(self
, url
): 
1519         # Extract playlist id 
1520         mobj 
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
) 
1522             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1523         playlist_id 
= mobj
.group(1) or mobj
.group(2) 
1525         # Check if it's a video-specific URL 
1526         query_dict 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
1527         if 'v' in query_dict
: 
1528             video_id 
= query_dict
['v'][0] 
1529             if self
._downloader
.params
.get('noplaylist'): 
1530                 self
.to_screen(u
'Downloading just video %s because of --no-playlist' % video_id
) 
1531                 return self
.url_result('https://www.youtube.com/watch?v=' + video_id
, 'Youtube') 
1533                 self
.to_screen(u
'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id
, video_id
)) 
1535         # Download playlist videos from API 
1538         for page_num 
in itertools
.count(1): 
1539             start_index 
= self
._MAX
_RESULTS 
* (page_num 
- 1) + 1 
1540             if start_index 
>= 1000: 
1541                 self
._downloader
.report_warning(u
'Max number of results reached') 
1543             url 
= self
._TEMPLATE
_URL 
% (playlist_id
, self
._MAX
_RESULTS
, start_index
) 
1544             page 
= self
._download
_webpage
(url
, playlist_id
, u
'Downloading page #%s' % page_num
) 
1547                 response 
= json
.loads(page
) 
1548             except ValueError as err
: 
1549                 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
)) 
1551             if 'feed' not in response
: 
1552                 raise ExtractorError(u
'Got a malformed response from YouTube API') 
1553             playlist_title 
= response
['feed']['title']['$t'] 
1554             if 'entry' not in response
['feed']: 
1555                 # Number of videos is a multiple of self._MAX_RESULTS 
1558             for entry 
in response
['feed']['entry']: 
1559                 index 
= entry
['yt$position']['$t'] 
1560                 if 'media$group' in entry 
and 'yt$videoid' in entry
['media$group']: 
1563                         'https://www.youtube.com/watch?v=' + entry
['media$group']['yt$videoid']['$t'] 
1566         videos 
= [v
[1] for v 
in sorted(videos
)] 
1568         url_results 
= [self
.url_result(vurl
, 'Youtube') for vurl 
in videos
] 
1569         return [self
.playlist_result(url_results
, playlist_id
, playlist_title
)] 
1572 class YoutubeChannelIE(InfoExtractor
): 
1573     IE_DESC 
= u
'YouTube.com channels' 
1574     _VALID_URL 
= r
"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" 
1575     _MORE_PAGES_INDICATOR 
= 'yt-uix-load-more' 
1576     _MORE_PAGES_URL 
= 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' 
1577     IE_NAME 
= u
'youtube:channel' 
1579     def extract_videos_from_page(self
, page
): 
1581         for mobj 
in re
.finditer(r
'href="/watch\?v=([0-9A-Za-z_-]+)&?', page
): 
1582             if mobj
.group(1) not in ids_in_page
: 
1583                 ids_in_page
.append(mobj
.group(1)) 
1586     def _real_extract(self
, url
): 
1587         # Extract channel id 
1588         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1590             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1592         # Download channel page 
1593         channel_id 
= mobj
.group(1) 
1596         # Download all channel pages using the json-based channel_ajax query 
1597         for pagenum 
in itertools
.count(1): 
1598             url 
= self
._MORE
_PAGES
_URL 
% (pagenum
, channel_id
) 
1599             page 
= self
._download
_webpage
(url
, channel_id
, 
1600                                           u
'Downloading page #%s' % pagenum
) 
1602             page 
= json
.loads(page
) 
1604             ids_in_page 
= self
.extract_videos_from_page(page
['content_html']) 
1605             video_ids
.extend(ids_in_page
) 
1607             if self
._MORE
_PAGES
_INDICATOR 
not in page
['load_more_widget_html']: 
1610         self
._downloader
.to_screen(u
'[youtube] Channel %s: Found %i videos' % (channel_id
, len(video_ids
))) 
1612         urls 
= ['http://www.youtube.com/watch?v=%s' % id for id in video_ids
] 
1613         url_entries 
= [self
.url_result(eurl
, 'Youtube') for eurl 
in urls
] 
1614         return [self
.playlist_result(url_entries
, channel_id
)] 
1617 class YoutubeUserIE(InfoExtractor
): 
1618     IE_DESC 
= u
'YouTube.com user videos (URL or "ytuser" keyword)' 
1619     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' 
1620     _TEMPLATE_URL 
= 'http://gdata.youtube.com/feeds/api/users/%s' 
1621     _GDATA_PAGE_SIZE 
= 50 
1622     _GDATA_URL 
= 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' 
1623     IE_NAME 
= u
'youtube:user' 
1626     def suitable(cls
, url
): 
1627         # Don't return True if the url can be extracted with other youtube 
1628         # extractor, the regex would is too permissive and it would match. 
1629         other_ies 
= iter(klass 
for (name
, klass
) in globals().items() if name
.endswith('IE') and klass 
is not cls
) 
1630         if any(ie
.suitable(url
) for ie 
in other_ies
): return False 
1631         else: return super(YoutubeUserIE
, cls
).suitable(url
) 
1633     def _real_extract(self
, url
): 
1635         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1637             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1639         username 
= mobj
.group(1) 
1641         # Download video ids using YouTube Data API. Result size per 
1642         # query is limited (currently to 50 videos) so we need to query 
1643         # page by page until there are no video ids - it means we got 
1648         for pagenum 
in itertools
.count(0): 
1649             start_index 
= pagenum 
* self
._GDATA
_PAGE
_SIZE 
+ 1 
1651             gdata_url 
= self
._GDATA
_URL 
% (username
, self
._GDATA
_PAGE
_SIZE
, start_index
) 
1652             page 
= self
._download
_webpage
(gdata_url
, username
, 
1653                                           u
'Downloading video ids from %d to %d' % (start_index
, start_index 
+ self
._GDATA
_PAGE
_SIZE
)) 
1656                 response 
= json
.loads(page
) 
1657             except ValueError as err
: 
1658                 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
)) 
1659             if 'entry' not in response
['feed']: 
1660                 # Number of videos is a multiple of self._MAX_RESULTS 
1663             # Extract video identifiers 
1665             for entry 
in response
['feed']['entry']: 
1666                 ids_in_page
.append(entry
['id']['$t'].split('/')[-1]) 
1667             video_ids
.extend(ids_in_page
) 
1669             # A little optimization - if current page is not 
1670             # "full", ie. does not contain PAGE_SIZE video ids then 
1671             # we can assume that this page is the last one - there 
1672             # are no more ids on further pages - no need to query 
1675             if len(ids_in_page
) < self
._GDATA
_PAGE
_SIZE
: 
1678         urls 
= ['http://www.youtube.com/watch?v=%s' % video_id 
for video_id 
in video_ids
] 
1679         url_results 
= [self
.url_result(rurl
, 'Youtube') for rurl 
in urls
] 
1680         return [self
.playlist_result(url_results
, playlist_title 
= username
)] 
1682 class YoutubeSearchIE(SearchInfoExtractor
): 
1683     IE_DESC 
= u
'YouTube.com searches' 
1684     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' 
1686     IE_NAME 
= u
'youtube:search' 
1687     _SEARCH_KEY 
= 'ytsearch' 
1689     def report_download_page(self
, query
, pagenum
): 
1690         """Report attempt to download search page with given number.""" 
1691         self
._downloader
.to_screen(u
'[youtube] query "%s": Downloading page %s' % (query
, pagenum
)) 
1693     def _get_n_results(self
, query
, n
): 
1694         """Get a specified number of results for a query""" 
1700         while (50 * pagenum
) < limit
: 
1701             self
.report_download_page(query
, pagenum
+1) 
1702             result_url 
= self
._API
_URL 
% (compat_urllib_parse
.quote_plus(query
), (50*pagenum
)+1) 
1703             request 
= compat_urllib_request
.Request(result_url
) 
1705                 data 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
1706             except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
1707                 raise ExtractorError(u
'Unable to download API page: %s' % compat_str(err
)) 
1708             api_response 
= json
.loads(data
)['data'] 
1710             if not 'items' in api_response
: 
1711                 raise ExtractorError(u
'[youtube] No video results') 
1713             new_ids 
= list(video
['id'] for video 
in api_response
['items']) 
1714             video_ids 
+= new_ids
 
1716             limit 
= min(n
, api_response
['totalItems']) 
1719         if len(video_ids
) > n
: 
1720             video_ids 
= video_ids
[:n
] 
1721         videos 
= [self
.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids
] 
1722         return self
.playlist_result(videos
, query
) 
1724 class YoutubeSearchDateIE(YoutubeSearchIE
): 
1725     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' 
1726     _SEARCH_KEY 
= 'ytsearchdate' 
1727     IE_DESC 
= u
'YouTube.com searches, newest videos first' 
1729 class YoutubeShowIE(InfoExtractor
): 
1730     IE_DESC 
= u
'YouTube.com (multi-season) shows' 
1731     _VALID_URL 
= r
'https?://www\.youtube\.com/show/(.*)' 
1732     IE_NAME 
= u
'youtube:show' 
1734     def _real_extract(self
, url
): 
1735         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1736         show_name 
= mobj
.group(1) 
1737         webpage 
= self
._download
_webpage
(url
, show_name
, u
'Downloading show webpage') 
1738         # There's one playlist for each season of the show 
1739         m_seasons 
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
)) 
1740         self
.to_screen(u
'%s: Found %s seasons' % (show_name
, len(m_seasons
))) 
1741         return [self
.url_result('https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') for season 
in m_seasons
] 
1744 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
): 
1746     Base class for extractors that fetch info from 
1747     http://www.youtube.com/feed_ajax 
1748     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. 
1750     _LOGIN_REQUIRED 
= True 
1752     # use action_load_personal_feed instead of action_load_system_feed 
1753     _PERSONAL_FEED 
= False 
1756     def _FEED_TEMPLATE(self
): 
1757         action 
= 'action_load_system_feed' 
1758         if self
._PERSONAL
_FEED
: 
1759             action 
= 'action_load_personal_feed' 
1760         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action
, self
._FEED
_NAME
) 
1764         return u
'youtube:%s' % self
._FEED
_NAME
 
1766     def _real_initialize(self
): 
1769     def _real_extract(self
, url
): 
1771         # The step argument is available only in 2.7 or higher 
1772         for i 
in itertools
.count(0): 
1773             paging 
= i
*self
._PAGING
_STEP
 
1774             info 
= self
._download
_webpage
(self
._FEED
_TEMPLATE 
% paging
, 
1775                                           u
'%s feed' % self
._FEED
_NAME
, 
1776                                           u
'Downloading page %s' % i
) 
1777             info 
= json
.loads(info
) 
1778             feed_html 
= info
['feed_html'] 
1779             m_ids 
= re
.finditer(r
'"/watch\?v=(.*?)["&]', feed_html
) 
1780             ids 
= orderedSet(m
.group(1) for m 
in m_ids
) 
1781             feed_entries
.extend(self
.url_result(id, 'Youtube') for id in ids
) 
1782             if info
['paging'] is None: 
1784         return self
.playlist_result(feed_entries
, playlist_title
=self
._PLAYLIST
_TITLE
) 
1786 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
): 
1787     IE_DESC 
= u
'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' 
1788     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' 
1789     _FEED_NAME 
= 'subscriptions' 
1790     _PLAYLIST_TITLE 
= u
'Youtube Subscriptions' 
1792 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
): 
1793     IE_DESC 
= u
'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' 
1794     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' 
1795     _FEED_NAME 
= 'recommended' 
1796     _PLAYLIST_TITLE 
= u
'Youtube Recommended videos' 
1798 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor
): 
1799     IE_DESC 
= u
'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' 
1800     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' 
1801     _FEED_NAME 
= 'watch_later' 
1802     _PLAYLIST_TITLE 
= u
'Youtube Watch Later' 
1804     _PERSONAL_FEED 
= True 
1806 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
): 
1807     IE_NAME 
= u
'youtube:favorites' 
1808     IE_DESC 
= u
'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' 
1809     _VALID_URL 
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' 
1810     _LOGIN_REQUIRED 
= True 
1812     def _real_extract(self
, url
): 
1813         webpage 
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') 
1814         playlist_id 
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, u
'favourites playlist id') 
1815         return self
.url_result(playlist_id
, 'YoutubePlaylist') 
1818 class YoutubeTruncatedURLIE(InfoExtractor
): 
1819     IE_NAME 
= 'youtube:truncated_url' 
1820     IE_DESC 
= False  # Do not list 
1821     _VALID_URL 
= r
'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$' 
1823     def _real_extract(self
, url
): 
1824         raise ExtractorError( 
1825             u
'Did you forget to quote the URL? Remember that & is a meta ' 
1826             u
'character in most shells, so you want to put the URL in quotes, ' 
1828             u
'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' 
1829             u
' (or simply  youtube-dl BaW_jenozKc  ).',