14 import xml
.etree
.ElementTree
 
  17 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  18 from .subtitles 
import SubtitlesInfoExtractor
 
  25     compat_urllib_request
, 
  39 class YoutubeBaseInfoExtractor(InfoExtractor
): 
  40     """Provide base functions for Youtube extractors""" 
  41     _LOGIN_URL 
= 'https://accounts.google.com/ServiceLogin' 
  42     _LANG_URL 
= r
'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
  43     _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
  44     _NETRC_MACHINE 
= 'youtube' 
  45     # If True it will raise an error if no login info is provided 
  46     _LOGIN_REQUIRED 
= False 
  48     def report_lang(self
): 
  49         """Report attempt to set language.""" 
  50         self
.to_screen(u
'Setting language') 
  52     def _set_language(self
): 
  53         request 
= compat_urllib_request
.Request(self
._LANG
_URL
) 
  56             compat_urllib_request
.urlopen(request
).read() 
  57         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  58             self
._downloader
.report_warning(u
'unable to set language: %s' % compat_str(err
)) 
  63         (username
, password
) = self
._get
_login
_info
() 
  64         # No authentication to be performed 
  66             if self
._LOGIN
_REQUIRED
: 
  67                 raise ExtractorError(u
'No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  70         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
) 
  72             login_page 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
  73         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  74             self
._downloader
.report_warning(u
'unable to fetch login page: %s' % compat_str(err
)) 
  79         match 
= re
.search(re
.compile(r
'<input.+?name="GALX".+?value="(.+?)"', re
.DOTALL
), login_page
) 
  82         match 
= re
.search(re
.compile(r
'<input.+?name="dsh".+?value="(.+?)"', re
.DOTALL
), login_page
) 
  88                 u
'continue': u
'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', 
  92                 u
'PersistentCookie': u
'yes', 
  94                 u
'bgresponse': u
'js_disabled', 
  95                 u
'checkConnection': u
'', 
  96                 u
'checkedDomains': u
'youtube', 
 102                 u
'signIn': u
'Sign in', 
 104                 u
'service': u
'youtube', 
 108         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode 
 110         login_form 
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
,v 
in login_form_strs
.items()) 
 111         login_data 
= compat_urllib_parse
.urlencode(login_form
).encode('ascii') 
 112         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
) 
 115             login_results 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
 116             if re
.search(r
'(?i)<form[^>]* id="gaia_loginform"', login_results
) is not None: 
 117                 self
._downloader
.report_warning(u
'unable to log in: bad username or password') 
 119         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 120             self
._downloader
.report_warning(u
'unable to log in: %s' % compat_str(err
)) 
 124     def _confirm_age(self
): 
 127                 'action_confirm':   'Confirm', 
 129         request 
= compat_urllib_request
.Request(self
._AGE
_URL
, compat_urllib_parse
.urlencode(age_form
)) 
 131             self
.report_age_confirmation() 
 132             compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
 133         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 134             raise ExtractorError(u
'Unable to confirm age: %s' % compat_str(err
)) 
 137     def _real_initialize(self
): 
 138         if self
._downloader 
is None: 
 140         if not self
._set
_language
(): 
 142         if not self
._login
(): 
 147 class YoutubeIE(YoutubeBaseInfoExtractor
, SubtitlesInfoExtractor
): 
 148     IE_DESC 
= u
'YouTube.com' 
 151                          (?:https?://)?                                       # http(s):// (optional) 
 152                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| 
 153                             tube\.majestyc\.net/| 
 154                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains 
 155                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls 
 156                          (?:                                                  # the various things that can precede the ID: 
 157                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ 
 158                              |(?:                                             # or the v= param in all its forms 
 159                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 
 160                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! 
 161                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) 
 165                          |youtu\.be/                                          # just youtu.be/xxxx 
 167                      )?                                                       # all until now is optional -> you can pass the naked ID 
 168                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID 
 169                      (?(1).+)?                                                # if we found the ID, everything can follow 
 171     _NEXT_URL_RE 
= r
'[\?&]next_url=([^&]+)' 
 172     # Listed in order of quality 
 173     _available_formats 
= ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13', 
 174                           # Apple HTTP Live Streaming 
 175                           '96', '95', '94', '93', '92', '132', '151', 
 177                           '85', '84', '102', '83', '101', '82', '100', 
 179                           '138', '137', '248', '136', '247', '135', '246', 
 180                           '245', '244', '134', '243', '133', '242', '160', 
 182                           '141', '172', '140', '171', '139', 
 184     _available_formats_prefer_free 
= ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13', 
 185                                       # Apple HTTP Live Streaming 
 186                                       '96', '95', '94', '93', '92', '132', '151', 
 188                                       '85', '102', '84', '101', '83', '100', '82', 
 190                                       '138', '248', '137', '247', '136', '246', '245', 
 191                                       '244', '135', '243', '134', '242', '133', '160', 
 193                                       '172', '141', '171', '140', '139', 
 195     _video_formats_map 
= { 
 196         'flv': ['35', '34', '6', '5'], 
 197         '3gp': ['36', '17', '13'], 
 198         'mp4': ['38', '37', '22', '18'], 
 199         'webm': ['46', '45', '44', '43'], 
 201     _video_extensions 
= { 
 223         # Apple HTTP Live Streaming 
 255     _video_dimensions 
= { 
 337             u
"url":  u
"http://www.youtube.com/watch?v=BaW_jenozKc", 
 338             u
"file":  u
"BaW_jenozKc.mp4", 
 340                 u
"title": u
"youtube-dl test video \"'/\\ä↭𝕐", 
 341                 u
"uploader": u
"Philipp Hagemeister", 
 342                 u
"uploader_id": u
"phihag", 
 343                 u
"upload_date": u
"20121002", 
 344                 u
"description": u
"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." 
 348             u
"url":  u
"http://www.youtube.com/watch?v=1ltcDfZMA3U", 
 349             u
"file":  u
"1ltcDfZMA3U.flv", 
 350             u
"note": u
"Test VEVO video (#897)", 
 352                 u
"upload_date": u
"20070518", 
 353                 u
"title": u
"Maps - It Will Find You", 
 354                 u
"description": u
"Music video by Maps performing It Will Find You.", 
 355                 u
"uploader": u
"MuteUSA", 
 356                 u
"uploader_id": u
"MuteUSA" 
 360             u
"url":  u
"http://www.youtube.com/watch?v=UxxajLWwzqY", 
 361             u
"file":  u
"UxxajLWwzqY.mp4", 
 362             u
"note": u
"Test generic use_cipher_signature video (#897)", 
 364                 u
"upload_date": u
"20120506", 
 365                 u
"title": u
"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", 
 366                 u
"description": u
"md5:5b292926389560516e384ac437c0ec07", 
 367                 u
"uploader": u
"Icona Pop", 
 368                 u
"uploader_id": u
"IconaPop" 
 372             u
"url":  u
"https://www.youtube.com/watch?v=07FYdnEawAQ", 
 373             u
"file":  u
"07FYdnEawAQ.mp4", 
 374             u
"note": u
"Test VEVO video with age protection (#956)", 
 376                 u
"upload_date": u
"20130703", 
 377                 u
"title": u
"Justin Timberlake - Tunnel Vision (Explicit)", 
 378                 u
"description": u
"md5:64249768eec3bc4276236606ea996373", 
 379                 u
"uploader": u
"justintimberlakeVEVO", 
 380                 u
"uploader_id": u
"justintimberlakeVEVO" 
 387     def suitable(cls
, url
): 
 388         """Receives a URL and returns True if suitable for this IE.""" 
 389         if YoutubePlaylistIE
.suitable(url
): return False 
 390         return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None 
 392     def __init__(self
, *args
, **kwargs
): 
 393         super(YoutubeIE
, self
).__init
__(*args
, **kwargs
) 
 394         self
._player
_cache 
= {} 
 396     def report_video_webpage_download(self
, video_id
): 
 397         """Report attempt to download video webpage.""" 
 398         self
.to_screen(u
'%s: Downloading video webpage' % video_id
) 
 400     def report_video_info_webpage_download(self
, video_id
): 
 401         """Report attempt to download video info webpage.""" 
 402         self
.to_screen(u
'%s: Downloading video info webpage' % video_id
) 
 404     def report_information_extraction(self
, video_id
): 
 405         """Report attempt to extract video information.""" 
 406         self
.to_screen(u
'%s: Extracting video information' % video_id
) 
 408     def report_unavailable_format(self
, video_id
, format
): 
 409         """Report extracted video URL.""" 
 410         self
.to_screen(u
'%s: Format %s not available' % (video_id
, format
)) 
 412     def report_rtmp_download(self
): 
 413         """Indicate the download will use the RTMP protocol.""" 
 414         self
.to_screen(u
'RTMP download detected') 
 416     def _extract_signature_function(self
, video_id
, player_url
, slen
): 
 417         id_m 
= re
.match(r
'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', 
 419         player_type 
= id_m
.group('ext') 
 420         player_id 
= id_m
.group('id') 
 422         # Read from filesystem cache 
 423         func_id 
= '%s_%s_%d' % (player_type
, player_id
, slen
) 
 424         assert os
.path
.basename(func_id
) == func_id
 
 425         cache_dir 
= get_cachedir(self
._downloader
.params
) 
 427         cache_enabled 
= cache_dir 
is not None 
 429             cache_fn 
= os
.path
.join(os
.path
.expanduser(cache_dir
), 
 433                 with io
.open(cache_fn
, 'r', encoding
='utf-8') as cachef
: 
 434                     cache_spec 
= json
.load(cachef
) 
 435                 return lambda s
: u
''.join(s
[i
] for i 
in cache_spec
) 
 437                 pass  # No cache available 
 439         if player_type 
== 'js': 
 440             code 
= self
._download
_webpage
( 
 441                 player_url
, video_id
, 
 442                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 443                 errnote
=u
'Download of %s failed' % player_url
) 
 444             res 
= self
._parse
_sig
_js
(code
) 
 445         elif player_type 
== 'swf': 
 446             urlh 
= self
._request
_webpage
( 
 447                 player_url
, video_id
, 
 448                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 449                 errnote
=u
'Download of %s failed' % player_url
) 
 451             res 
= self
._parse
_sig
_swf
(code
) 
 453             assert False, 'Invalid player type %r' % player_type
 
 457                 test_string 
= u
''.join(map(compat_chr
, range(slen
))) 
 458                 cache_res 
= res(test_string
) 
 459                 cache_spec 
= [ord(c
) for c 
in cache_res
] 
 461                     os
.makedirs(os
.path
.dirname(cache_fn
)) 
 462                 except OSError as ose
: 
 463                     if ose
.errno 
!= errno
.EEXIST
: 
 465                 write_json_file(cache_spec
, cache_fn
) 
 467                 tb 
= traceback
.format_exc() 
 468                 self
._downloader
.report_warning( 
 469                     u
'Writing cache to %r failed: %s' % (cache_fn
, tb
)) 
 473     def _print_sig_code(self
, func
, slen
): 
 474         def gen_sig_code(idxs
): 
 475             def _genslice(start
, end
, step
): 
 476                 starts 
= u
'' if start 
== 0 else str(start
) 
 477                 ends 
= (u
':%d' % (end
+step
)) if end 
+ step 
>= 0 else u
':' 
 478                 steps 
= u
'' if step 
== 1 else (u
':%d' % step
) 
 479                 return u
's[%s%s%s]' % (starts
, ends
, steps
) 
 482             start 
= '(Never used)'  # Quelch pyflakes warnings - start will be 
 483                                     # set as soon as step is set 
 484             for i
, prev 
in zip(idxs
[1:], idxs
[:-1]): 
 488                     yield _genslice(start
, prev
, step
) 
 491                 if i 
- prev 
in [-1, 1]: 
 496                     yield u
's[%d]' % prev
 
 500                 yield _genslice(start
, i
, step
) 
 502         test_string 
= u
''.join(map(compat_chr
, range(slen
))) 
 503         cache_res 
= func(test_string
) 
 504         cache_spec 
= [ord(c
) for c 
in cache_res
] 
 505         expr_code 
= u
' + '.join(gen_sig_code(cache_spec
)) 
 506         code 
= u
'if len(s) == %d:\n    return %s\n' % (slen
, expr_code
) 
 507         self
.to_screen(u
'Extracted signature function:\n' + code
) 
 509     def _parse_sig_js(self
, jscode
): 
 510         funcname 
= self
._search
_regex
( 
 511             r
'signature=([a-zA-Z]+)', jscode
, 
 512             u
'Initial JS player signature function name') 
 517             return string
.lowercase
.index(varname
) 
 519         def interpret_statement(stmt
, local_vars
, allow_recursion
=20): 
 520             if allow_recursion 
< 0: 
 521                 raise ExtractorError(u
'Recursion limit reached') 
 523             if stmt
.startswith(u
'var '): 
 524                 stmt 
= stmt
[len(u
'var '):] 
 525             ass_m 
= re
.match(r
'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + 
 526                              r
'=(?P<expr>.*)$', stmt
) 
 528                 if ass_m
.groupdict().get('index'): 
 530                         lvar 
= local_vars
[ass_m
.group('out')] 
 531                         idx 
= interpret_expression(ass_m
.group('index'), 
 532                                                    local_vars
, allow_recursion
) 
 533                         assert isinstance(idx
, int) 
 536                     expr 
= ass_m
.group('expr') 
 539                         local_vars
[ass_m
.group('out')] = val
 
 541                     expr 
= ass_m
.group('expr') 
 542             elif stmt
.startswith(u
'return '): 
 544                 expr 
= stmt
[len(u
'return '):] 
 546                 raise ExtractorError( 
 547                     u
'Cannot determine left side of statement in %r' % stmt
) 
 549             v 
= interpret_expression(expr
, local_vars
, allow_recursion
) 
 552         def interpret_expression(expr
, local_vars
, allow_recursion
): 
 557                 return local_vars
[expr
] 
 559             m 
= re
.match(r
'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr
) 
 561                 member 
= m
.group('member') 
 562                 val 
= local_vars
[m
.group('in')] 
 563                 if member 
== 'split("")': 
 565                 if member 
== 'join("")': 
 567                 if member 
== 'length': 
 569                 if member 
== 'reverse()': 
 571                 slice_m 
= re
.match(r
'slice\((?P<idx>.*)\)', member
) 
 573                     idx 
= interpret_expression( 
 574                         slice_m
.group('idx'), local_vars
, allow_recursion
-1) 
 578                 r
'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr
) 
 580                 val 
= local_vars
[m
.group('in')] 
 581                 idx 
= interpret_expression(m
.group('idx'), local_vars
, 
 585             m 
= re
.match(r
'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr
) 
 587                 a 
= interpret_expression(m
.group('a'), 
 588                                          local_vars
, allow_recursion
) 
 589                 b 
= interpret_expression(m
.group('b'), 
 590                                          local_vars
, allow_recursion
) 
 594                 r
'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr
) 
 596                 fname 
= m
.group('func') 
 597                 if fname 
not in functions
: 
 598                     functions
[fname
] = extract_function(fname
) 
 599                 argvals 
= [int(v
) if v
.isdigit() else local_vars
[v
] 
 600                            for v 
in m
.group('args').split(',')] 
 601                 return functions
[fname
](argvals
) 
 602             raise ExtractorError(u
'Unsupported JS expression %r' % expr
) 
 604         def extract_function(funcname
): 
 606                 r
'function ' + re
.escape(funcname
) + 
 607                 r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', 
 609             argnames 
= func_m
.group('args').split(',') 
 612                 local_vars 
= dict(zip(argnames
, args
)) 
 613                 for stmt 
in func_m
.group('code').split(';'): 
 614                     res 
= interpret_statement(stmt
, local_vars
) 
 618         initial_function 
= extract_function(funcname
) 
 619         return lambda s
: initial_function([s
]) 
 621     def _parse_sig_swf(self
, file_contents
): 
 622         if file_contents
[1:3] != b
'WS': 
 623             raise ExtractorError( 
 624                 u
'Not an SWF file; header is %r' % file_contents
[:3]) 
 625         if file_contents
[:1] == b
'C': 
 626             content 
= zlib
.decompress(file_contents
[8:]) 
 628             raise NotImplementedError(u
'Unsupported compression format %r' % 
 631         def extract_tags(content
): 
 633             while pos 
< len(content
): 
 634                 header16 
= struct
.unpack('<H', content
[pos
:pos
+2])[0] 
 636                 tag_code 
= header16 
>> 6 
 637                 tag_len 
= header16 
& 0x3f 
 639                     tag_len 
= struct
.unpack('<I', content
[pos
:pos
+4])[0] 
 641                 assert pos
+tag_len 
<= len(content
) 
 642                 yield (tag_code
, content
[pos
:pos
+tag_len
]) 
 646                         for tag_code
, tag 
in extract_tags(content
) 
 648         p 
= code_tag
.index(b
'\0', 4) + 1 
 649         code_reader 
= io
.BytesIO(code_tag
[p
:]) 
 651         # Parse ABC (AVM2 ByteCode) 
 652         def read_int(reader
=None): 
 660                 b 
= struct
.unpack('<B', buf
)[0] 
 661                 res 
= res | 
((b 
& 0x7f) << shift
) 
 667         def u30(reader
=None): 
 668             res 
= read_int(reader
) 
 669             assert res 
& 0xf0000000 == 0 
 673         def s32(reader
=None): 
 675             if v 
& 0x80000000 != 0: 
 676                 v 
= - ((v ^ 
0xffffffff) + 1) 
 679         def read_string(reader
=None): 
 683             resb 
= reader
.read(slen
) 
 684             assert len(resb
) == slen
 
 685             return resb
.decode('utf-8') 
 687         def read_bytes(count
, reader
=None): 
 690             resb 
= reader
.read(count
) 
 691             assert len(resb
) == count
 
 694         def read_byte(reader
=None): 
 695             resb 
= read_bytes(1, reader
=reader
) 
 696             res 
= struct
.unpack('<B', resb
)[0] 
 699         # minor_version + major_version 
 704         for _c 
in range(1, int_count
): 
 707         for _c 
in range(1, uint_count
): 
 710         read_bytes((double_count
-1) * 8) 
 712         constant_strings 
= [u
''] 
 713         for _c 
in range(1, string_count
): 
 715             constant_strings
.append(s
) 
 716         namespace_count 
= u30() 
 717         for _c 
in range(1, namespace_count
): 
 721         for _c 
in range(1, ns_set_count
): 
 723             for _c2 
in range(count
): 
 725         multiname_count 
= u30() 
 734             0x0e: 2,  # MultinameA 
 735             0x1b: 1,  # MultinameL 
 736             0x1c: 1,  # MultinameLA 
 739         for _c 
in range(1, multiname_count
): 
 741             assert kind 
in MULTINAME_SIZES
, u
'Invalid multiname kind %r' % kind
 
 743                 u30()  # namespace_idx 
 745                 multinames
.append(constant_strings
[name_idx
]) 
 747                 multinames
.append('[MULTINAME kind: %d]' % kind
) 
 748                 for _c2 
in range(MULTINAME_SIZES
[kind
]): 
 753         MethodInfo 
= collections
.namedtuple( 
 755             ['NEED_ARGUMENTS', 'NEED_REST']) 
 757         for method_id 
in range(method_count
): 
 760             for _ 
in range(param_count
): 
 762             u30()  # name index (always 0 for youtube) 
 764             if flags 
& 0x08 != 0: 
 767                 for c 
in range(option_count
): 
 770             if flags 
& 0x80 != 0: 
 771                 # Param names present 
 772                 for _ 
in range(param_count
): 
 774             mi 
= MethodInfo(flags 
& 0x01 != 0, flags 
& 0x04 != 0) 
 775             method_infos
.append(mi
) 
 778         metadata_count 
= u30() 
 779         for _c 
in range(metadata_count
): 
 782             for _c2 
in range(item_count
): 
 786         def parse_traits_info(): 
 787             trait_name_idx 
= u30() 
 788             kind_full 
= read_byte() 
 789             kind 
= kind_full 
& 0x0f 
 790             attrs 
= kind_full 
>> 4 
 792             if kind 
in [0x00, 0x06]:  # Slot or Const 
 794                 u30()  # type_name_idx 
 798             elif kind 
in [0x01, 0x02, 0x03]:  # Method / Getter / Setter 
 801                 methods
[multinames
[trait_name_idx
]] = method_idx
 
 802             elif kind 
== 0x04:  # Class 
 805             elif kind 
== 0x05:  # Function 
 808                 methods
[function_idx
] = multinames
[trait_name_idx
] 
 810                 raise ExtractorError(u
'Unsupported trait kind %d' % kind
) 
 812             if attrs 
& 0x4 != 0:  # Metadata present 
 813                 metadata_count 
= u30() 
 814                 for _c3 
in range(metadata_count
): 
 815                     u30()  # metadata index 
 820         TARGET_CLASSNAME 
= u
'SignatureDecipher' 
 821         searched_idx 
= multinames
.index(TARGET_CLASSNAME
) 
 822         searched_class_id 
= None 
 824         for class_id 
in range(class_count
): 
 826             if name_idx 
== searched_idx
: 
 827                 # We found the class we're looking for! 
 828                 searched_class_id 
= class_id
 
 829             u30()  # super_name idx 
 831             if flags 
& 0x08 != 0:  # Protected namespace is present 
 832                 u30()  # protected_ns_idx 
 834             for _c2 
in range(intrf_count
): 
 838             for _c2 
in range(trait_count
): 
 841         if searched_class_id 
is None: 
 842             raise ExtractorError(u
'Target class %r not found' % 
 847         for class_id 
in range(class_count
): 
 850             for _c2 
in range(trait_count
): 
 851                 trait_methods 
= parse_traits_info() 
 852                 if class_id 
== searched_class_id
: 
 853                     method_names
.update(trait_methods
.items()) 
 854                     method_idxs
.update(dict( 
 856                         for name
, idx 
in trait_methods
.items())) 
 860         for _c 
in range(script_count
): 
 863             for _c2 
in range(trait_count
): 
 867         method_body_count 
= u30() 
 868         Method 
= collections
.namedtuple('Method', ['code', 'local_count']) 
 870         for _c 
in range(method_body_count
): 
 874             u30()  # init_scope_depth 
 875             u30()  # max_scope_depth 
 877             code 
= read_bytes(code_length
) 
 878             if method_idx 
in method_idxs
: 
 879                 m 
= Method(code
, local_count
) 
 880                 methods
[method_idxs
[method_idx
]] = m
 
 881             exception_count 
= u30() 
 882             for _c2 
in range(exception_count
): 
 889             for _c2 
in range(trait_count
): 
 892         assert p 
+ code_reader
.tell() == len(code_tag
) 
 893         assert len(methods
) == len(method_idxs
) 
 895         method_pyfunctions 
= {} 
 897         def extract_function(func_name
): 
 898             if func_name 
in method_pyfunctions
: 
 899                 return method_pyfunctions
[func_name
] 
 900             if func_name 
not in methods
: 
 901                 raise ExtractorError(u
'Cannot find function %r' % func_name
) 
 902             m 
= methods
[func_name
] 
 905                 registers 
= ['(this)'] + list(args
) + [None] * m
.local_count
 
 907                 coder 
= io
.BytesIO(m
.code
) 
 909                     opcode 
= struct
.unpack('!B', coder
.read(1))[0] 
 910                     if opcode 
== 36:  # pushbyte 
 911                         v 
= struct
.unpack('!B', coder
.read(1))[0] 
 913                     elif opcode 
== 44:  # pushstring 
 915                         stack
.append(constant_strings
[idx
]) 
 916                     elif opcode 
== 48:  # pushscope 
 917                         # We don't implement the scope register, so we'll just 
 918                         # ignore the popped value 
 920                     elif opcode 
== 70:  # callproperty 
 922                         mname 
= multinames
[index
] 
 923                         arg_count 
= u30(coder
) 
 924                         args 
= list(reversed( 
 925                             [stack
.pop() for _ 
in range(arg_count
)])) 
 927                         if mname 
== u
'split': 
 928                             assert len(args
) == 1 
 929                             assert isinstance(args
[0], compat_str
) 
 930                             assert isinstance(obj
, compat_str
) 
 934                                 res 
= obj
.split(args
[0]) 
 936                         elif mname 
== u
'slice': 
 937                             assert len(args
) == 1 
 938                             assert isinstance(args
[0], int) 
 939                             assert isinstance(obj
, list) 
 942                         elif mname 
== u
'join': 
 943                             assert len(args
) == 1 
 944                             assert isinstance(args
[0], compat_str
) 
 945                             assert isinstance(obj
, list) 
 946                             res 
= args
[0].join(obj
) 
 948                         elif mname 
in method_pyfunctions
: 
 949                             stack
.append(method_pyfunctions
[mname
](args
)) 
 951                             raise NotImplementedError( 
 952                                 u
'Unsupported property %r on %r' 
 954                     elif opcode 
== 72:  # returnvalue 
 957                     elif opcode 
== 79:  # callpropvoid 
 959                         mname 
= multinames
[index
] 
 960                         arg_count 
= u30(coder
) 
 961                         args 
= list(reversed( 
 962                             [stack
.pop() for _ 
in range(arg_count
)])) 
 964                         if mname 
== u
'reverse': 
 965                             assert isinstance(obj
, list) 
 968                             raise NotImplementedError( 
 969                                 u
'Unsupported (void) property %r on %r' 
 971                     elif opcode 
== 93:  # findpropstrict 
 973                         mname 
= multinames
[index
] 
 974                         res 
= extract_function(mname
) 
 976                     elif opcode 
== 97:  # setproperty 
 981                         assert isinstance(obj
, list) 
 982                         assert isinstance(idx
, int) 
 984                     elif opcode 
== 98:  # getlocal 
 986                         stack
.append(registers
[index
]) 
 987                     elif opcode 
== 99:  # setlocal 
 990                         registers
[index
] = value
 
 991                     elif opcode 
== 102:  # getproperty 
 993                         pname 
= multinames
[index
] 
 994                         if pname 
== u
'length': 
 996                             assert isinstance(obj
, list) 
 997                             stack
.append(len(obj
)) 
 998                         else:  # Assume attribute access 
1000                             assert isinstance(idx
, int) 
1002                             assert isinstance(obj
, list) 
1003                             stack
.append(obj
[idx
]) 
1004                     elif opcode 
== 128:  # coerce 
1006                     elif opcode 
== 133:  # coerce_s 
1007                         assert isinstance(stack
[-1], (type(None), compat_str
)) 
1008                     elif opcode 
== 164:  # modulo 
1009                         value2 
= stack
.pop() 
1010                         value1 
= stack
.pop() 
1011                         res 
= value1 
% value2
 
1013                     elif opcode 
== 208:  # getlocal_0 
1014                         stack
.append(registers
[0]) 
1015                     elif opcode 
== 209:  # getlocal_1 
1016                         stack
.append(registers
[1]) 
1017                     elif opcode 
== 210:  # getlocal_2 
1018                         stack
.append(registers
[2]) 
1019                     elif opcode 
== 211:  # getlocal_3 
1020                         stack
.append(registers
[3]) 
1021                     elif opcode 
== 214:  # setlocal_2 
1022                         registers
[2] = stack
.pop() 
1023                     elif opcode 
== 215:  # setlocal_3 
1024                         registers
[3] = stack
.pop() 
1026                         raise NotImplementedError( 
1027                             u
'Unsupported opcode %d' % opcode
) 
1029             method_pyfunctions
[func_name
] = resfunc
 
1032         initial_function 
= extract_function(u
'decipher') 
1033         return lambda s
: initial_function([s
]) 
1035     def _decrypt_signature(self
, s
, video_id
, player_url
, age_gate
=False): 
1036         """Turn the encrypted s field into a working signature""" 
1038         if player_url 
is not None: 
1040                 player_id 
= (player_url
, len(s
)) 
1041                 if player_id 
not in self
._player
_cache
: 
1042                     func 
= self
._extract
_signature
_function
( 
1043                         video_id
, player_url
, len(s
) 
1045                     self
._player
_cache
[player_id
] = func
 
1046                 func 
= self
._player
_cache
[player_id
] 
1047                 if self
._downloader
.params
.get('youtube_print_sig_code'): 
1048                     self
._print
_sig
_code
(func
, len(s
)) 
1051                 tb 
= traceback
.format_exc() 
1052                 self
._downloader
.report_warning( 
1053                     u
'Automatic signature extraction failed: ' + tb
) 
1055             self
._downloader
.report_warning( 
1056                 u
'Warning: Falling back to static signature algorithm') 
1058         return self
._static
_decrypt
_signature
( 
1059             s
, video_id
, player_url
, age_gate
) 
1061     def _static_decrypt_signature(self
, s
, video_id
, player_url
, age_gate
): 
1063             # The videos with age protection use another player, so the 
1064             # algorithms can be different. 
1066                 return s
[2:63] + s
[82] + s
[64:82] + s
[63] 
1069             return s
[86:29:-1] + s
[88] + s
[28:5:-1] 
1071             return s
[25] + s
[3:25] + s
[0] + s
[26:42] + s
[79] + s
[43:79] + s
[91] + s
[80:83] 
1073             return s
[84:27:-1] + s
[86] + s
[26:5:-1] 
1075             return s
[25] + s
[3:25] + s
[2] + s
[26:40] + s
[77] + s
[41:77] + s
[89] + s
[78:81] 
1077             return s
[84:78:-1] + s
[87] + s
[77:60:-1] + s
[0] + s
[59:3:-1] 
1079             return s
[7:28] + s
[87] + s
[29:45] + s
[55] + s
[46:55] + s
[2] + s
[56:87] + s
[28] 
1081             return s
[6:27] + s
[4] + s
[28:39] + s
[27] + s
[40:59] + s
[2] + s
[60:] 
1083             return s
[80:72:-1] + s
[16] + s
[71:39:-1] + s
[72] + s
[38:16:-1] + s
[82] + s
[15::-1] 
1085             return s
[3:11] + s
[0] + s
[12:55] + s
[84] + s
[56:84] 
1087             return s
[78:70:-1] + s
[14] + s
[69:37:-1] + s
[70] + s
[36:14:-1] + s
[80] + s
[:14][::-1] 
1089             return s
[80:63:-1] + s
[0] + s
[62:0:-1] + s
[63] 
1091             return s
[80:37:-1] + s
[7] + s
[36:7:-1] + s
[0] + s
[6:0:-1] + s
[37] 
1093             return s
[56] + s
[79:56:-1] + s
[41] + s
[55:41:-1] + s
[80] + s
[40:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
1095             return s
[1:19] + s
[0] + s
[20:68] + s
[19] + s
[69:80] 
1097             return s
[54] + s
[77:54:-1] + s
[39] + s
[53:39:-1] + s
[78] + s
[38:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
1100             raise ExtractorError(u
'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s
))) 
1102     def _get_available_subtitles(self
, video_id
): 
1104             sub_list 
= self
._download
_webpage
( 
1105                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
, 
1106                 video_id
, note
=False) 
1107         except ExtractorError 
as err
: 
1108             self
._downloader
.report_warning(u
'unable to download video subtitles: %s' % compat_str(err
)) 
1110         lang_list 
= re
.findall(r
'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list
) 
1115             params 
= compat_urllib_parse
.urlencode({ 
1118                 'fmt': self
._downloader
.params
.get('subtitlesformat'), 
1121             url 
= u
'http://www.youtube.com/api/timedtext?' + params
 
1122             sub_lang_list
[lang
] = url
 
1123         if not sub_lang_list
: 
1124             self
._downloader
.report_warning(u
'video doesn\'t have subtitles') 
1126         return sub_lang_list
 
1128     def _get_available_automatic_caption(self
, video_id
, webpage
): 
1129         """We need the webpage for getting the captions url, pass it as an 
1130            argument to speed up the process.""" 
1131         sub_format 
= self
._downloader
.params
.get('subtitlesformat') 
1132         self
.to_screen(u
'%s: Looking for automatic captions' % video_id
) 
1133         mobj 
= re
.search(r
';ytplayer.config = ({.*?});', webpage
) 
1134         err_msg 
= u
'Couldn\'t find automatic captions for %s' % video_id
 
1136             self
._downloader
.report_warning(err_msg
) 
1138         player_config 
= json
.loads(mobj
.group(1)) 
1140             args 
= player_config
[u
'args'] 
1141             caption_url 
= args
[u
'ttsurl'] 
1142             timestamp 
= args
[u
'timestamp'] 
1143             # We get the available subtitles 
1144             list_params 
= compat_urllib_parse
.urlencode({ 
1149             list_url 
= caption_url 
+ '&' + list_params
 
1150             list_page 
= self
._download
_webpage
(list_url
, video_id
) 
1151             caption_list 
= xml
.etree
.ElementTree
.fromstring(list_page
.encode('utf-8')) 
1152             original_lang_node 
= caption_list
.find('track') 
1153             if original_lang_node 
is None or original_lang_node
.attrib
.get('kind') != 'asr' : 
1154                 self
._downloader
.report_warning(u
'Video doesn\'t have automatic captions') 
1156             original_lang 
= original_lang_node
.attrib
['lang_code'] 
1159             for lang_node 
in caption_list
.findall('target'): 
1160                 sub_lang 
= lang_node
.attrib
['lang_code'] 
1161                 params 
= compat_urllib_parse
.urlencode({ 
1162                     'lang': original_lang
, 
1168                 sub_lang_list
[sub_lang
] = caption_url 
+ '&' + params
 
1169             return sub_lang_list
 
1170         # An extractor error can be raise by the download process if there are 
1171         # no automatic captions but there are subtitles 
1172         except (KeyError, ExtractorError
): 
1173             self
._downloader
.report_warning(err_msg
) 
1176     def _print_formats(self
, formats
): 
1177         print('Available formats:') 
1179             print('%s\t:\t%s\t[%s]%s' %(x
, self
._video
_extensions
.get(x
, 'flv'), 
1180                                         self
._video
_dimensions
.get(x
, '???'), 
1181                                         ' ('+self
._special
_itags
[x
]+')' if x 
in self
._special
_itags 
else '')) 
1183     def _extract_id(self
, url
): 
1184         mobj 
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
) 
1186             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1187         video_id 
= mobj
.group(2) 
1190     def _get_video_url_list(self
, url_map
): 
1192         Transform a dictionary in the format {itag:url} to a list of (itag, url) 
1193         with the requested formats. 
1195         req_format 
= self
._downloader
.params
.get('format', None) 
1196         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
1197         available_formats 
= self
._available
_formats
_prefer
_free 
if self
._downloader
.params
.get('prefer_free_formats', False) else self
._available
_formats
 
1198         if format_limit 
is not None and format_limit 
in available_formats
: 
1199             format_list 
= available_formats
[available_formats
.index(format_limit
):] 
1201             format_list 
= available_formats
 
1202         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
1203         if len(existing_formats
) == 0: 
1204             raise ExtractorError(u
'no known formats available for video') 
1205         if self
._downloader
.params
.get('listformats', None): 
1206             self
._print
_formats
(existing_formats
) 
1208         if req_format 
is None or req_format 
== 'best': 
1209             video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
1210         elif req_format 
== 'worst': 
1211             video_url_list 
= [(existing_formats
[-1], url_map
[existing_formats
[-1]])] # worst quality 
1212         elif req_format 
in ('-1', 'all'): 
1213             video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
1215             # Specific formats. We pick the first in a slash-delimeted sequence. 
1216             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality 
1217             # available in the specified format. For example, 
1218             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. 
1219             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'. 
1220             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'. 
1221             req_formats 
= req_format
.split('/') 
1222             video_url_list 
= None 
1223             for rf 
in req_formats
: 
1225                     video_url_list 
= [(rf
, url_map
[rf
])] 
1227                 if rf 
in self
._video
_formats
_map
: 
1228                     for srf 
in self
._video
_formats
_map
[rf
]: 
1230                             video_url_list 
= [(srf
, url_map
[srf
])] 
1235             if video_url_list 
is None: 
1236                 raise ExtractorError(u
'requested format not available') 
1237         return video_url_list
 
1239     def _extract_from_m3u8(self
, manifest_url
, video_id
): 
1241         def _get_urls(_manifest
): 
1242             lines 
= _manifest
.split('\n') 
1243             urls 
= filter(lambda l
: l 
and not l
.startswith('#'), 
1246         manifest 
= self
._download
_webpage
(manifest_url
, video_id
, u
'Downloading formats manifest') 
1247         formats_urls 
= _get_urls(manifest
) 
1248         for format_url 
in formats_urls
: 
1249             itag 
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag') 
1250             url_map
[itag
] = format_url
 
1253     def _extract_annotations(self
, video_id
): 
1254         url 
= 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 
1255         return self
._download
_webpage
(url
, video_id
, note
=u
'Searching for annotations.', errnote
=u
'Unable to download video annotations.') 
1257     def _real_extract(self
, url
): 
1258         # Extract original video URL from URL with redirection, like age verification, using next_url parameter 
1259         mobj 
= re
.search(self
._NEXT
_URL
_RE
, url
) 
1261             url 
= 'https://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/') 
1262         video_id 
= self
._extract
_id
(url
) 
1265         self
.report_video_webpage_download(video_id
) 
1266         url 
= 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
 
1267         request 
= compat_urllib_request
.Request(url
) 
1269             video_webpage_bytes 
= compat_urllib_request
.urlopen(request
).read() 
1270         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
1271             raise ExtractorError(u
'Unable to download video webpage: %s' % compat_str(err
)) 
1273         video_webpage 
= video_webpage_bytes
.decode('utf-8', 'ignore') 
1275         # Attempt to extract SWF player URL 
1276         mobj 
= re
.search(r
'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1277         if mobj 
is not None: 
1278             player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1283         self
.report_video_info_webpage_download(video_id
) 
1284         if re
.search(r
'player-age-gate-content">', video_webpage
) is not None: 
1285             self
.report_age_confirmation() 
1287             # We simulate the access to the video from www.youtube.com/v/{video_id} 
1288             # this can be viewed without login into Youtube 
1289             data 
= compat_urllib_parse
.urlencode({'video_id': video_id
, 
1293                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id
, 
1297             video_info_url 
= 'https://www.youtube.com/get_video_info?' + data
 
1298             video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
1300                                     errnote
='unable to download video info webpage') 
1301             video_info 
= compat_parse_qs(video_info_webpage
) 
1304             for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1305                 video_info_url 
= ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1306                         % (video_id
, el_type
)) 
1307                 video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
1309                                         errnote
='unable to download video info webpage') 
1310                 video_info 
= compat_parse_qs(video_info_webpage
) 
1311                 if 'token' in video_info
: 
1313         if 'token' not in video_info
: 
1314             if 'reason' in video_info
: 
1315                 raise ExtractorError(u
'YouTube said: %s' % video_info
['reason'][0], expected
=True) 
1317                 raise ExtractorError(u
'"token" parameter not in video info for unknown reason') 
1319         # Check for "rental" videos 
1320         if 'ypc_video_rental_bar_text' in video_info 
and 'author' not in video_info
: 
1321             raise ExtractorError(u
'"rental" videos not supported') 
1323         # Start extracting information 
1324         self
.report_information_extraction(video_id
) 
1327         if 'author' not in video_info
: 
1328             raise ExtractorError(u
'Unable to extract uploader name') 
1329         video_uploader 
= compat_urllib_parse
.unquote_plus(video_info
['author'][0]) 
1332         video_uploader_id 
= None 
1333         mobj 
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
) 
1334         if mobj 
is not None: 
1335             video_uploader_id 
= mobj
.group(1) 
1337             self
._downloader
.report_warning(u
'unable to extract uploader nickname') 
1340         if 'title' in video_info
: 
1341             video_title 
= compat_urllib_parse
.unquote_plus(video_info
['title'][0]) 
1343             self
._downloader
.report_warning(u
'Unable to extract video title') 
1347         # We try first to get a high quality image: 
1348         m_thumb 
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">', 
1349                             video_webpage
, re
.DOTALL
) 
1350         if m_thumb 
is not None: 
1351             video_thumbnail 
= m_thumb
.group(1) 
1352         elif 'thumbnail_url' not in video_info
: 
1353             self
._downloader
.report_warning(u
'unable to extract video thumbnail') 
1354             video_thumbnail 
= None 
1355         else:   # don't panic if we can't find it 
1356             video_thumbnail 
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0]) 
1360         mobj 
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
1361         if mobj 
is not None: 
1362             upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1363             upload_date 
= unified_strdate(upload_date
) 
1366         video_description 
= get_element_by_id("eow-description", video_webpage
) 
1367         if video_description
: 
1368             video_description 
= clean_html(video_description
) 
1370             fd_mobj 
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
) 
1372                 video_description 
= unescapeHTML(fd_mobj
.group(1)) 
1374                 video_description 
= u
'' 
1377         video_subtitles 
= self
.extract_subtitles(video_id
, video_webpage
) 
1379         if self
._downloader
.params
.get('listsubtitles', False): 
1380             self
._list
_available
_subtitles
(video_id
, video_webpage
) 
1383         if 'length_seconds' not in video_info
: 
1384             self
._downloader
.report_warning(u
'unable to extract video duration') 
1387             video_duration 
= compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0]) 
1390         video_annotations 
= None 
1391         if self
._downloader
.params
.get('writeannotations', False): 
1392                 video_annotations 
= self
._extract
_annotations
(video_id
) 
1394         # Decide which formats to download 
1397             mobj 
= re
.search(r
';ytplayer.config = ({.*?});', video_webpage
) 
1399                 raise ValueError('Could not find vevo ID') 
1400             info 
= json
.loads(mobj
.group(1)) 
1402             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map 
1403             # this signatures are encrypted 
1404             if 'url_encoded_fmt_stream_map' not in args
: 
1405                 raise ValueError(u
'No stream_map present')  # caught below 
1406             m_s 
= re
.search(r
'[&,]s=', args
['url_encoded_fmt_stream_map']) 
1408                 self
.to_screen(u
'%s: Encrypted signatures detected.' % video_id
) 
1409                 video_info
['url_encoded_fmt_stream_map'] = [args
['url_encoded_fmt_stream_map']] 
1410             m_s 
= re
.search(r
'[&,]s=', args
.get('adaptive_fmts', u
'')) 
1412                 if 'url_encoded_fmt_stream_map' in video_info
: 
1413                     video_info
['url_encoded_fmt_stream_map'][0] += ',' + args
['adaptive_fmts'] 
1415                     video_info
['url_encoded_fmt_stream_map'] = [args
['adaptive_fmts']] 
1416             elif 'adaptive_fmts' in video_info
: 
1417                 if 'url_encoded_fmt_stream_map' in video_info
: 
1418                     video_info
['url_encoded_fmt_stream_map'][0] += ',' + video_info
['adaptive_fmts'][0] 
1420                     video_info
['url_encoded_fmt_stream_map'] = video_info
['adaptive_fmts'] 
1424         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1425             self
.report_rtmp_download() 
1426             video_url_list 
= [(None, video_info
['conn'][0])] 
1427         elif 'url_encoded_fmt_stream_map' in video_info 
and len(video_info
['url_encoded_fmt_stream_map']) >= 1: 
1428             if 'rtmpe%3Dyes' in video_info
['url_encoded_fmt_stream_map'][0]: 
1429                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True) 
1431             for url_data_str 
in video_info
['url_encoded_fmt_stream_map'][0].split(','): 
1432                 url_data 
= compat_parse_qs(url_data_str
) 
1433                 if 'itag' in url_data 
and 'url' in url_data
: 
1434                     url 
= url_data
['url'][0] 
1435                     if 'sig' in url_data
: 
1436                         url 
+= '&signature=' + url_data
['sig'][0] 
1437                     elif 's' in url_data
: 
1438                         encrypted_sig 
= url_data
['s'][0] 
1439                         if self
._downloader
.params
.get('verbose'): 
1441                                 if player_url 
is None: 
1442                                     player_version 
= 'unknown' 
1444                                     player_version 
= self
._search
_regex
( 
1445                                         r
'-(.+)\.swf$', player_url
, 
1446                                         u
'flash player', fatal
=False) 
1447                                 player_desc 
= 'flash player %s' % player_version
 
1449                                 player_version 
= self
._search
_regex
( 
1450                                     r
'html5player-(.+?)\.js', video_webpage
, 
1451                                     'html5 player', fatal
=False) 
1452                                 player_desc 
= u
'html5 player %s' % player_version
 
1454                             parts_sizes 
= u
'.'.join(compat_str(len(part
)) for part 
in encrypted_sig
.split('.')) 
1455                             self
.to_screen(u
'encrypted signature length %d (%s), itag %s, %s' % 
1456                                 (len(encrypted_sig
), parts_sizes
, url_data
['itag'][0], player_desc
)) 
1459                             jsplayer_url_json 
= self
._search
_regex
( 
1460                                 r
'"assets":.+?"js":\s*("[^"]+")', 
1461                                 video_webpage
, u
'JS player URL') 
1462                             player_url 
= json
.loads(jsplayer_url_json
) 
1464                         signature 
= self
._decrypt
_signature
( 
1465                             encrypted_sig
, video_id
, player_url
, age_gate
) 
1466                         url 
+= '&signature=' + signature
 
1467                     if 'ratebypass' not in url
: 
1468                         url 
+= '&ratebypass=yes' 
1469                     url_map
[url_data
['itag'][0]] = url
 
1470             video_url_list 
= self
._get
_video
_url
_list
(url_map
) 
1471             if not video_url_list
: 
1473         elif video_info
.get('hlsvp'): 
1474             manifest_url 
= video_info
['hlsvp'][0] 
1475             url_map 
= self
._extract
_from
_m
3u8(manifest_url
, video_id
) 
1476             video_url_list 
= self
._get
_video
_url
_list
(url_map
) 
1477             if not video_url_list
: 
1481             raise ExtractorError(u
'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') 
1484         for format_param
, video_real_url 
in video_url_list
: 
1486             video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
1488             video_format 
= '{0} - {1}{2}'.format(format_param 
if format_param 
else video_extension
, 
1489                                               self
._video
_dimensions
.get(format_param
, '???'), 
1490                                               ' ('+self
._special
_itags
[format_param
]+')' if format_param 
in self
._special
_itags 
else '') 
1494                 'url':      video_real_url
, 
1495                 'uploader': video_uploader
, 
1496                 'uploader_id': video_uploader_id
, 
1497                 'upload_date':  upload_date
, 
1498                 'title':    video_title
, 
1499                 'ext':      video_extension
, 
1500                 'format':   video_format
, 
1501                 'thumbnail':    video_thumbnail
, 
1502                 'description':  video_description
, 
1503                 'player_url':   player_url
, 
1504                 'subtitles':    video_subtitles
, 
1505                 'duration':     video_duration
, 
1506                 'age_limit':    18 if age_gate 
else 0, 
1507                 'annotations':  video_annotations
 
1511 class YoutubePlaylistIE(InfoExtractor
): 
1512     IE_DESC 
= u
'YouTube.com playlists' 
1513     _VALID_URL 
= r
"""(?: 
1518                            (?:course|view_play_list|my_playlists|artist|playlist|watch) 
1519                            \? (?:.*?&)*? (?:p|a|list)= 
1522                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,}) 
1525                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) 
1527     _TEMPLATE_URL 
= 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' 
1529     IE_NAME 
= u
'youtube:playlist' 
1532     def suitable(cls
, url
): 
1533         """Receives a URL and returns True if suitable for this IE.""" 
1534         return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None 
1536     def _real_extract(self
, url
): 
1537         # Extract playlist id 
1538         mobj 
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
) 
1540             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1541         playlist_id 
= mobj
.group(1) or mobj
.group(2) 
1543         # Check if it's a video-specific URL 
1544         query_dict 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
1545         if 'v' in query_dict
: 
1546             video_id 
= query_dict
['v'][0] 
1547             if self
._downloader
.params
.get('noplaylist'): 
1548                 self
.to_screen(u
'Downloading just video %s because of --no-playlist' % video_id
) 
1549                 return self
.url_result('https://www.youtube.com/watch?v=' + video_id
, 'Youtube') 
1551                 self
.to_screen(u
'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id
, video_id
)) 
1553         # Download playlist videos from API 
1556         for page_num 
in itertools
.count(1): 
1557             start_index 
= self
._MAX
_RESULTS 
* (page_num 
- 1) + 1 
1558             if start_index 
>= 1000: 
1559                 self
._downloader
.report_warning(u
'Max number of results reached') 
1561             url 
= self
._TEMPLATE
_URL 
% (playlist_id
, self
._MAX
_RESULTS
, start_index
) 
1562             page 
= self
._download
_webpage
(url
, playlist_id
, u
'Downloading page #%s' % page_num
) 
1565                 response 
= json
.loads(page
) 
1566             except ValueError as err
: 
1567                 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
)) 
1569             if 'feed' not in response
: 
1570                 raise ExtractorError(u
'Got a malformed response from YouTube API') 
1571             playlist_title 
= response
['feed']['title']['$t'] 
1572             if 'entry' not in response
['feed']: 
1573                 # Number of videos is a multiple of self._MAX_RESULTS 
1576             for entry 
in response
['feed']['entry']: 
1577                 index 
= entry
['yt$position']['$t'] 
1578                 if 'media$group' in entry 
and 'yt$videoid' in entry
['media$group']: 
1581                         'https://www.youtube.com/watch?v=' + entry
['media$group']['yt$videoid']['$t'] 
1584         videos 
= [v
[1] for v 
in sorted(videos
)] 
1586         url_results 
= [self
.url_result(vurl
, 'Youtube') for vurl 
in videos
] 
1587         return [self
.playlist_result(url_results
, playlist_id
, playlist_title
)] 
1590 class YoutubeChannelIE(InfoExtractor
): 
1591     IE_DESC 
= u
'YouTube.com channels' 
1592     _VALID_URL 
= r
"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" 
1593     _TEMPLATE_URL 
= 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en' 
1594     _MORE_PAGES_INDICATOR 
= 'yt-uix-load-more' 
1595     _MORE_PAGES_URL 
= 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' 
1596     IE_NAME 
= u
'youtube:channel' 
1598     def extract_videos_from_page(self
, page
): 
1600         for mobj 
in re
.finditer(r
'href="/watch\?v=([0-9A-Za-z_-]+)&?', page
): 
1601             if mobj
.group(1) not in ids_in_page
: 
1602                 ids_in_page
.append(mobj
.group(1)) 
1605     def _real_extract(self
, url
): 
1606         # Extract channel id 
1607         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1609             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1611         # Download channel page 
1612         channel_id 
= mobj
.group(1) 
1616         url 
= self
._TEMPLATE
_URL 
% (channel_id
, pagenum
) 
1617         page 
= self
._download
_webpage
(url
, channel_id
, 
1618                                       u
'Downloading page #%s' % pagenum
) 
1620         # Extract video identifiers 
1621         ids_in_page 
= self
.extract_videos_from_page(page
) 
1622         video_ids
.extend(ids_in_page
) 
1624         # Download any subsequent channel pages using the json-based channel_ajax query 
1625         if self
._MORE
_PAGES
_INDICATOR 
in page
: 
1626             for pagenum 
in itertools
.count(1): 
1627                 url 
= self
._MORE
_PAGES
_URL 
% (pagenum
, channel_id
) 
1628                 page 
= self
._download
_webpage
(url
, channel_id
, 
1629                                               u
'Downloading page #%s' % pagenum
) 
1631                 page 
= json
.loads(page
) 
1633                 ids_in_page 
= self
.extract_videos_from_page(page
['content_html']) 
1634                 video_ids
.extend(ids_in_page
) 
1636                 if self
._MORE
_PAGES
_INDICATOR  
not in page
['load_more_widget_html']: 
1639         self
._downloader
.to_screen(u
'[youtube] Channel %s: Found %i videos' % (channel_id
, len(video_ids
))) 
1641         urls 
= ['http://www.youtube.com/watch?v=%s' % id for id in video_ids
] 
1642         url_entries 
= [self
.url_result(eurl
, 'Youtube') for eurl 
in urls
] 
1643         return [self
.playlist_result(url_entries
, channel_id
)] 
1646 class YoutubeUserIE(InfoExtractor
): 
1647     IE_DESC 
= u
'YouTube.com user videos (URL or "ytuser" keyword)' 
1648     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' 
1649     _TEMPLATE_URL 
= 'http://gdata.youtube.com/feeds/api/users/%s' 
1650     _GDATA_PAGE_SIZE 
= 50 
1651     _GDATA_URL 
= 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' 
1652     IE_NAME 
= u
'youtube:user' 
1655     def suitable(cls
, url
): 
1656         # Don't return True if the url can be extracted with other youtube 
1657         # extractor, the regex would is too permissive and it would match. 
1658         other_ies 
= iter(klass 
for (name
, klass
) in globals().items() if name
.endswith('IE') and klass 
is not cls
) 
1659         if any(ie
.suitable(url
) for ie 
in other_ies
): return False 
1660         else: return super(YoutubeUserIE
, cls
).suitable(url
) 
1662     def _real_extract(self
, url
): 
1664         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1666             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1668         username 
= mobj
.group(1) 
1670         # Download video ids using YouTube Data API. Result size per 
1671         # query is limited (currently to 50 videos) so we need to query 
1672         # page by page until there are no video ids - it means we got 
1677         for pagenum 
in itertools
.count(0): 
1678             start_index 
= pagenum 
* self
._GDATA
_PAGE
_SIZE 
+ 1 
1680             gdata_url 
= self
._GDATA
_URL 
% (username
, self
._GDATA
_PAGE
_SIZE
, start_index
) 
1681             page 
= self
._download
_webpage
(gdata_url
, username
, 
1682                                           u
'Downloading video ids from %d to %d' % (start_index
, start_index 
+ self
._GDATA
_PAGE
_SIZE
)) 
1685                 response 
= json
.loads(page
) 
1686             except ValueError as err
: 
1687                 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
)) 
1688             if 'entry' not in response
['feed']: 
1689                 # Number of videos is a multiple of self._MAX_RESULTS 
1692             # Extract video identifiers 
1694             for entry 
in response
['feed']['entry']: 
1695                 ids_in_page
.append(entry
['id']['$t'].split('/')[-1]) 
1696             video_ids
.extend(ids_in_page
) 
1698             # A little optimization - if current page is not 
1699             # "full", ie. does not contain PAGE_SIZE video ids then 
1700             # we can assume that this page is the last one - there 
1701             # are no more ids on further pages - no need to query 
1704             if len(ids_in_page
) < self
._GDATA
_PAGE
_SIZE
: 
1707         urls 
= ['http://www.youtube.com/watch?v=%s' % video_id 
for video_id 
in video_ids
] 
1708         url_results 
= [self
.url_result(rurl
, 'Youtube') for rurl 
in urls
] 
1709         return [self
.playlist_result(url_results
, playlist_title 
= username
)] 
1711 class YoutubeSearchIE(SearchInfoExtractor
): 
1712     IE_DESC 
= u
'YouTube.com searches' 
1713     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' 
1715     IE_NAME 
= u
'youtube:search' 
1716     _SEARCH_KEY 
= 'ytsearch' 
1718     def report_download_page(self
, query
, pagenum
): 
1719         """Report attempt to download search page with given number.""" 
1720         self
._downloader
.to_screen(u
'[youtube] query "%s": Downloading page %s' % (query
, pagenum
)) 
1722     def _get_n_results(self
, query
, n
): 
1723         """Get a specified number of results for a query""" 
1729         while (50 * pagenum
) < limit
: 
1730             self
.report_download_page(query
, pagenum
+1) 
1731             result_url 
= self
._API
_URL 
% (compat_urllib_parse
.quote_plus(query
), (50*pagenum
)+1) 
1732             request 
= compat_urllib_request
.Request(result_url
) 
1734                 data 
= compat_urllib_request
.urlopen(request
).read().decode('utf-8') 
1735             except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
1736                 raise ExtractorError(u
'Unable to download API page: %s' % compat_str(err
)) 
1737             api_response 
= json
.loads(data
)['data'] 
1739             if not 'items' in api_response
: 
1740                 raise ExtractorError(u
'[youtube] No video results') 
1742             new_ids 
= list(video
['id'] for video 
in api_response
['items']) 
1743             video_ids 
+= new_ids
 
1745             limit 
= min(n
, api_response
['totalItems']) 
1748         if len(video_ids
) > n
: 
1749             video_ids 
= video_ids
[:n
] 
1750         videos 
= [self
.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids
] 
1751         return self
.playlist_result(videos
, query
) 
1754 class YoutubeShowIE(InfoExtractor
): 
1755     IE_DESC 
= u
'YouTube.com (multi-season) shows' 
1756     _VALID_URL 
= r
'https?://www\.youtube\.com/show/(.*)' 
1757     IE_NAME 
= u
'youtube:show' 
1759     def _real_extract(self
, url
): 
1760         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1761         show_name 
= mobj
.group(1) 
1762         webpage 
= self
._download
_webpage
(url
, show_name
, u
'Downloading show webpage') 
1763         # There's one playlist for each season of the show 
1764         m_seasons 
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
)) 
1765         self
.to_screen(u
'%s: Found %s seasons' % (show_name
, len(m_seasons
))) 
1766         return [self
.url_result('https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') for season 
in m_seasons
] 
1769 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
): 
1771     Base class for extractors that fetch info from 
1772     http://www.youtube.com/feed_ajax 
1773     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. 
1775     _LOGIN_REQUIRED 
= True 
1777     # use action_load_personal_feed instead of action_load_system_feed 
1778     _PERSONAL_FEED 
= False 
1781     def _FEED_TEMPLATE(self
): 
1782         action 
= 'action_load_system_feed' 
1783         if self
._PERSONAL
_FEED
: 
1784             action 
= 'action_load_personal_feed' 
1785         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action
, self
._FEED
_NAME
) 
1789         return u
'youtube:%s' % self
._FEED
_NAME
 
1791     def _real_initialize(self
): 
1794     def _real_extract(self
, url
): 
1796         # The step argument is available only in 2.7 or higher 
1797         for i 
in itertools
.count(0): 
1798             paging 
= i
*self
._PAGING
_STEP
 
1799             info 
= self
._download
_webpage
(self
._FEED
_TEMPLATE 
% paging
, 
1800                                           u
'%s feed' % self
._FEED
_NAME
, 
1801                                           u
'Downloading page %s' % i
) 
1802             info 
= json
.loads(info
) 
1803             feed_html 
= info
['feed_html'] 
1804             m_ids 
= re
.finditer(r
'"/watch\?v=(.*?)["&]', feed_html
) 
1805             ids 
= orderedSet(m
.group(1) for m 
in m_ids
) 
1806             feed_entries
.extend(self
.url_result(id, 'Youtube') for id in ids
) 
1807             if info
['paging'] is None: 
1809         return self
.playlist_result(feed_entries
, playlist_title
=self
._PLAYLIST
_TITLE
) 
1811 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
): 
1812     IE_DESC 
= u
'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)' 
1813     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' 
1814     _FEED_NAME 
= 'subscriptions' 
1815     _PLAYLIST_TITLE 
= u
'Youtube Subscriptions' 
1817 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
): 
1818     IE_DESC 
= u
'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' 
1819     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' 
1820     _FEED_NAME 
= 'recommended' 
1821     _PLAYLIST_TITLE 
= u
'Youtube Recommended videos' 
1823 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor
): 
1824     IE_DESC 
= u
'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' 
1825     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' 
1826     _FEED_NAME 
= 'watch_later' 
1827     _PLAYLIST_TITLE 
= u
'Youtube Watch Later' 
1829     _PERSONAL_FEED 
= True 
1831 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
): 
1832     IE_NAME 
= u
'youtube:favorites' 
1833     IE_DESC 
= u
'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' 
1834     _VALID_URL 
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' 
1835     _LOGIN_REQUIRED 
= True 
1837     def _real_extract(self
, url
): 
1838         webpage 
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') 
1839         playlist_id 
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, u
'favourites playlist id') 
1840         return self
.url_result(playlist_id
, 'YoutubePlaylist') 
1843 class YoutubeTruncatedURLIE(InfoExtractor
): 
1844     IE_NAME 
= 'youtube:truncated_url' 
1845     IE_DESC 
= False  # Do not list 
1846     _VALID_URL 
= r
'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$' 
1848     def _real_extract(self
, url
): 
1849         raise ExtractorError( 
1850             u
'Did you forget to quote the URL? Remember that & is a meta ' 
1851             u
'character in most shells, so you want to put the URL in quotes, ' 
1853             u
'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' 
1854             u
' (or simply  youtube-dl BaW_jenozKc  ).',