14 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  15 from .subtitles 
import SubtitlesInfoExtractor
 
  16 from ..jsinterp 
import JSInterpreter
 
  21     compat_urllib_request
, 
  28     get_element_by_attribute
, 
  39 class YoutubeBaseInfoExtractor(InfoExtractor
): 
  40     """Provide base functions for Youtube extractors""" 
  41     _LOGIN_URL 
= 'https://accounts.google.com/ServiceLogin' 
  42     _LANG_URL 
= r
'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
  43     _AGE_URL 
= 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
  44     _NETRC_MACHINE 
= 'youtube' 
  45     # If True it will raise an error if no login info is provided 
  46     _LOGIN_REQUIRED 
= False 
  48     def _set_language(self
): 
  49         return bool(self
._download
_webpage
( 
  51             note
=u
'Setting language', errnote
='unable to set language', 
  55         (username
, password
) = self
._get
_login
_info
() 
  56         # No authentication to be performed 
  58             if self
._LOGIN
_REQUIRED
: 
  59                 raise ExtractorError(u
'No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  62         login_page 
= self
._download
_webpage
( 
  63             self
._LOGIN
_URL
, None, 
  64             note
=u
'Downloading login page', 
  65             errnote
=u
'unable to fetch login page', fatal
=False) 
  66         if login_page 
is False: 
  69         galx 
= self
._search
_regex
(r
'(?s)<input.+?name="GALX".+?value="(.+?)"', 
  70                                   login_page
, u
'Login GALX parameter') 
  74                 u
'continue': u
'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', 
  78                 u
'PersistentCookie': u
'yes', 
  80                 u
'bgresponse': u
'js_disabled', 
  81                 u
'checkConnection': u
'', 
  82                 u
'checkedDomains': u
'youtube', 
  87                 u
'signIn': u
'Sign in', 
  89                 u
'service': u
'youtube', 
  93         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode 
  95         login_form 
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
,v 
in login_form_strs
.items()) 
  96         login_data 
= compat_urllib_parse
.urlencode(login_form
).encode('ascii') 
  98         req 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
) 
  99         login_results 
= self
._download
_webpage
( 
 101             note
=u
'Logging in', errnote
=u
'unable to log in', fatal
=False) 
 102         if login_results 
is False: 
 104         if re
.search(r
'(?i)<form[^>]* id="gaia_loginform"', login_results
) is not None: 
 105             self
._downloader
.report_warning(u
'unable to log in: bad username or password') 
 109     def _confirm_age(self
): 
 112             'action_confirm': 'Confirm', 
 114         req 
= compat_urllib_request
.Request(self
._AGE
_URL
, 
 115             compat_urllib_parse
.urlencode(age_form
).encode('ascii')) 
 117         self
._download
_webpage
( 
 119             note
=u
'Confirming age', errnote
=u
'Unable to confirm age') 
 122     def _real_initialize(self
): 
 123         if self
._downloader 
is None: 
 125         if not self
._set
_language
(): 
 127         if not self
._login
(): 
 132 class YoutubeIE(YoutubeBaseInfoExtractor
, SubtitlesInfoExtractor
): 
 133     IE_DESC 
= u
'YouTube.com' 
 134     _VALID_URL 
= r
"""(?x)^ 
 136                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional) 
 137                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| 
 138                             (?:www\.)?deturl\.com/www\.youtube\.com/| 
 139                             (?:www\.)?pwnyoutube\.com/| 
 140                             (?:www\.)?yourepeat\.com/| 
 141                             tube\.majestyc\.net/| 
 142                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains 
 143                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls 
 144                          (?:                                                  # the various things that can precede the ID: 
 145                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ 
 146                              |(?:                                             # or the v= param in all its forms 
 147                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 
 148                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! 
 149                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) 
 153                          |youtu\.be/                                          # just youtu.be/xxxx 
 154                          |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= 
 156                      )?                                                       # all until now is optional -> you can pass the naked ID 
 157                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID 
 158                      (?(1).+)?                                                # if we found the ID, everything can follow 
 160     _NEXT_URL_RE 
= r
'[\?&]next_url=([^&]+)' 
 162         '5': {'ext': 'flv', 'width': 400, 'height': 240}, 
 163         '6': {'ext': 'flv', 'width': 450, 'height': 270}, 
 164         '13': {'ext': '3gp'}, 
 165         '17': {'ext': '3gp', 'width': 176, 'height': 144}, 
 166         '18': {'ext': 'mp4', 'width': 640, 'height': 360}, 
 167         '22': {'ext': 'mp4', 'width': 1280, 'height': 720}, 
 168         '34': {'ext': 'flv', 'width': 640, 'height': 360}, 
 169         '35': {'ext': 'flv', 'width': 854, 'height': 480}, 
 170         '36': {'ext': '3gp', 'width': 320, 'height': 240}, 
 171         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080}, 
 172         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072}, 
 173         '43': {'ext': 'webm', 'width': 640, 'height': 360}, 
 174         '44': {'ext': 'webm', 'width': 854, 'height': 480}, 
 175         '45': {'ext': 'webm', 'width': 1280, 'height': 720}, 
 176         '46': {'ext': 'webm', 'width': 1920, 'height': 1080}, 
 180         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20}, 
 181         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20}, 
 182         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20}, 
 183         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20}, 
 184         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20}, 
 185         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20}, 
 186         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20}, 
 188         # Apple HTTP Live Streaming 
 189         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, 
 190         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10}, 
 191         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10}, 
 192         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10}, 
 193         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10}, 
 194         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, 
 195         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10}, 
 198         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 199         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 200         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 201         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 202         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 203         '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 204         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 205         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 208         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, 
 209         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50}, 
 210         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, 
 213         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 214         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 215         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 216         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 217         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 218         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 219         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 220         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 221         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 222         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 223         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 224         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 225         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 226         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 229         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, 
 230         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, 
 233         '_rtmp': {'protocol': 'rtmp'}, 
 239             u
"url":  u
"http://www.youtube.com/watch?v=BaW_jenozKc", 
 240             u
"file":  u
"BaW_jenozKc.mp4", 
 242                 u
"title": u
"youtube-dl test video \"'/\\ä↭𝕐", 
 243                 u
"uploader": u
"Philipp Hagemeister", 
 244                 u
"uploader_id": u
"phihag", 
 245                 u
"upload_date": u
"20121002", 
 246                 u
"description": u
"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", 
 247                 u
"categories": [u
'Science & Technology'], 
 251             u
"url":  u
"http://www.youtube.com/watch?v=UxxajLWwzqY", 
 252             u
"file":  u
"UxxajLWwzqY.mp4", 
 253             u
"note": u
"Test generic use_cipher_signature video (#897)", 
 255                 u
"upload_date": u
"20120506", 
 256                 u
"title": u
"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", 
 257                 u
"description": u
"md5:fea86fda2d5a5784273df5c7cc994d9f", 
 258                 u
"uploader": u
"Icona Pop", 
 259                 u
"uploader_id": u
"IconaPop" 
 263             u
"url":  u
"https://www.youtube.com/watch?v=07FYdnEawAQ", 
 264             u
"file":  u
"07FYdnEawAQ.mp4", 
 265             u
"note": u
"Test VEVO video with age protection (#956)", 
 267                 u
"upload_date": u
"20130703", 
 268                 u
"title": u
"Justin Timberlake - Tunnel Vision (Explicit)", 
 269                 u
"description": u
"md5:64249768eec3bc4276236606ea996373", 
 270                 u
"uploader": u
"justintimberlakeVEVO", 
 271                 u
"uploader_id": u
"justintimberlakeVEVO" 
 275             u
"url":  u
"//www.YouTube.com/watch?v=yZIXLfi8CZQ", 
 276             u
"file":  u
"yZIXLfi8CZQ.mp4", 
 277             u
"note": u
"Embed-only video (#1746)", 
 279                 u
"upload_date": u
"20120608", 
 280                 u
"title": u
"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", 
 281                 u
"description": u
"md5:09b78bd971f1e3e289601dfba15ca4f7", 
 282                 u
"uploader": u
"SET India", 
 283                 u
"uploader_id": u
"setindia" 
 287             u
"url": u
"http://www.youtube.com/watch?v=a9LDPn-MO4I", 
 288             u
"file": u
"a9LDPn-MO4I.m4a", 
 289             u
"note": u
"256k DASH audio (format 141) via DASH manifest", 
 291                 u
"upload_date": "20121002", 
 292                 u
"uploader_id": "8KVIDEO", 
 293                 u
"description": "No description available.", 
 294                 u
"uploader": "8KVIDEO", 
 295                 u
"title": "UHDTV TEST 8K VIDEO.mp4" 
 298                 u
"youtube_include_dash_manifest": True, 
 302         # DASH manifest with encrypted signature 
 304             u
'url': u
'https://www.youtube.com/watch?v=IB3lcPjvWLA', 
 306                 u
'id': u
'IB3lcPjvWLA', 
 308                 u
'title': u
'Afrojack - The Spark ft. Spree Wilson', 
 309                 u
'description': u
'md5:9717375db5a9a3992be4668bbf3bc0a8', 
 310                 u
'uploader': u
'AfrojackVEVO', 
 311                 u
'uploader_id': u
'AfrojackVEVO', 
 312                 u
'upload_date': u
'20131011', 
 315                 u
'youtube_include_dash_manifest': True, 
 323     def suitable(cls
, url
): 
 324         """Receives a URL and returns True if suitable for this IE.""" 
 325         if YoutubePlaylistIE
.suitable(url
): return False 
 326         return re
.match(cls
._VALID
_URL
, url
) is not None 
 328     def __init__(self
, *args
, **kwargs
): 
 329         super(YoutubeIE
, self
).__init
__(*args
, **kwargs
) 
 330         self
._player
_cache 
= {} 
 332     def report_video_info_webpage_download(self
, video_id
): 
 333         """Report attempt to download video info webpage.""" 
 334         self
.to_screen(u
'%s: Downloading video info webpage' % video_id
) 
 336     def report_information_extraction(self
, video_id
): 
 337         """Report attempt to extract video information.""" 
 338         self
.to_screen(u
'%s: Extracting video information' % video_id
) 
 340     def report_unavailable_format(self
, video_id
, format
): 
 341         """Report extracted video URL.""" 
 342         self
.to_screen(u
'%s: Format %s not available' % (video_id
, format
)) 
 344     def report_rtmp_download(self
): 
 345         """Indicate the download will use the RTMP protocol.""" 
 346         self
.to_screen(u
'RTMP download detected') 
 348     def _extract_signature_function(self
, video_id
, player_url
, slen
): 
 349         id_m 
= re
.match(r
'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', 
 351         player_type 
= id_m
.group('ext') 
 352         player_id 
= id_m
.group('id') 
 354         # Read from filesystem cache 
 355         func_id 
= '%s_%s_%d' % (player_type
, player_id
, slen
) 
 356         assert os
.path
.basename(func_id
) == func_id
 
 357         cache_dir 
= get_cachedir(self
._downloader
.params
) 
 359         cache_enabled 
= cache_dir 
is not None 
 361             cache_fn 
= os
.path
.join(os
.path
.expanduser(cache_dir
), 
 365                 with io
.open(cache_fn
, 'r', encoding
='utf-8') as cachef
: 
 366                     cache_spec 
= json
.load(cachef
) 
 367                 return lambda s
: u
''.join(s
[i
] for i 
in cache_spec
) 
 369                 pass  # No cache available 
 371         if player_type 
== 'js': 
 372             code 
= self
._download
_webpage
( 
 373                 player_url
, video_id
, 
 374                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 375                 errnote
=u
'Download of %s failed' % player_url
) 
 376             res 
= self
._parse
_sig
_js
(code
) 
 377         elif player_type 
== 'swf': 
 378             urlh 
= self
._request
_webpage
( 
 379                 player_url
, video_id
, 
 380                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 381                 errnote
=u
'Download of %s failed' % player_url
) 
 383             res 
= self
._parse
_sig
_swf
(code
) 
 385             assert False, 'Invalid player type %r' % player_type
 
 389                 test_string 
= u
''.join(map(compat_chr
, range(slen
))) 
 390                 cache_res 
= res(test_string
) 
 391                 cache_spec 
= [ord(c
) for c 
in cache_res
] 
 393                     os
.makedirs(os
.path
.dirname(cache_fn
)) 
 394                 except OSError as ose
: 
 395                     if ose
.errno 
!= errno
.EEXIST
: 
 397                 write_json_file(cache_spec
, cache_fn
) 
 399                 tb 
= traceback
.format_exc() 
 400                 self
._downloader
.report_warning( 
 401                     u
'Writing cache to %r failed: %s' % (cache_fn
, tb
)) 
 405     def _print_sig_code(self
, func
, slen
): 
 406         def gen_sig_code(idxs
): 
 407             def _genslice(start
, end
, step
): 
 408                 starts 
= u
'' if start 
== 0 else str(start
) 
 409                 ends 
= (u
':%d' % (end
+step
)) if end 
+ step 
>= 0 else u
':' 
 410                 steps 
= u
'' if step 
== 1 else (u
':%d' % step
) 
 411                 return u
's[%s%s%s]' % (starts
, ends
, steps
) 
 414             start 
= '(Never used)'  # Quelch pyflakes warnings - start will be 
 415                                     # set as soon as step is set 
 416             for i
, prev 
in zip(idxs
[1:], idxs
[:-1]): 
 420                     yield _genslice(start
, prev
, step
) 
 423                 if i 
- prev 
in [-1, 1]: 
 428                     yield u
's[%d]' % prev
 
 432                 yield _genslice(start
, i
, step
) 
 434         test_string 
= u
''.join(map(compat_chr
, range(slen
))) 
 435         cache_res 
= func(test_string
) 
 436         cache_spec 
= [ord(c
) for c 
in cache_res
] 
 437         expr_code 
= u
' + '.join(gen_sig_code(cache_spec
)) 
 438         code 
= u
'if len(s) == %d:\n    return %s\n' % (slen
, expr_code
) 
 439         self
.to_screen(u
'Extracted signature function:\n' + code
) 
 441     def _parse_sig_js(self
, jscode
): 
 442         funcname 
= self
._search
_regex
( 
 443             r
'signature=([a-zA-Z]+)', jscode
, 
 444              u
'Initial JS player signature function name') 
 446         jsi 
= JSInterpreter(jscode
) 
 447         initial_function 
= jsi
.extract_function(funcname
) 
 448         return lambda s
: initial_function([s
]) 
 450     def _parse_sig_swf(self
, file_contents
): 
 451         if file_contents
[1:3] != b
'WS': 
 452             raise ExtractorError( 
 453                 u
'Not an SWF file; header is %r' % file_contents
[:3]) 
 454         if file_contents
[:1] == b
'C': 
 455             content 
= zlib
.decompress(file_contents
[8:]) 
 457             raise NotImplementedError(u
'Unsupported compression format %r' % 
 460         def extract_tags(content
): 
 462             while pos 
< len(content
): 
 463                 header16 
= struct
.unpack('<H', content
[pos
:pos
+2])[0] 
 465                 tag_code 
= header16 
>> 6 
 466                 tag_len 
= header16 
& 0x3f 
 468                     tag_len 
= struct
.unpack('<I', content
[pos
:pos
+4])[0] 
 470                 assert pos
+tag_len 
<= len(content
) 
 471                 yield (tag_code
, content
[pos
:pos
+tag_len
]) 
 475                         for tag_code
, tag 
in extract_tags(content
) 
 477         p 
= code_tag
.index(b
'\0', 4) + 1 
 478         code_reader 
= io
.BytesIO(code_tag
[p
:]) 
 480         # Parse ABC (AVM2 ByteCode) 
 481         def read_int(reader
=None): 
 489                 b 
= struct
.unpack('<B', buf
)[0] 
 490                 res 
= res | 
((b 
& 0x7f) << shift
) 
 496         def u30(reader
=None): 
 497             res 
= read_int(reader
) 
 498             assert res 
& 0xf0000000 == 0 
 502         def s32(reader
=None): 
 504             if v 
& 0x80000000 != 0: 
 505                 v 
= - ((v ^ 
0xffffffff) + 1) 
 508         def read_string(reader
=None): 
 512             resb 
= reader
.read(slen
) 
 513             assert len(resb
) == slen
 
 514             return resb
.decode('utf-8') 
 516         def read_bytes(count
, reader
=None): 
 519             resb 
= reader
.read(count
) 
 520             assert len(resb
) == count
 
 523         def read_byte(reader
=None): 
 524             resb 
= read_bytes(1, reader
=reader
) 
 525             res 
= struct
.unpack('<B', resb
)[0] 
 528         # minor_version + major_version 
 533         for _c 
in range(1, int_count
): 
 536         for _c 
in range(1, uint_count
): 
 539         read_bytes((double_count
-1) * 8) 
 541         constant_strings 
= [u
''] 
 542         for _c 
in range(1, string_count
): 
 544             constant_strings
.append(s
) 
 545         namespace_count 
= u30() 
 546         for _c 
in range(1, namespace_count
): 
 550         for _c 
in range(1, ns_set_count
): 
 552             for _c2 
in range(count
): 
 554         multiname_count 
= u30() 
 563             0x0e: 2,  # MultinameA 
 564             0x1b: 1,  # MultinameL 
 565             0x1c: 1,  # MultinameLA 
 568         for _c 
in range(1, multiname_count
): 
 570             assert kind 
in MULTINAME_SIZES
, u
'Invalid multiname kind %r' % kind
 
 572                 u30()  # namespace_idx 
 574                 multinames
.append(constant_strings
[name_idx
]) 
 576                 multinames
.append('[MULTINAME kind: %d]' % kind
) 
 577                 for _c2 
in range(MULTINAME_SIZES
[kind
]): 
 582         MethodInfo 
= collections
.namedtuple( 
 584             ['NEED_ARGUMENTS', 'NEED_REST']) 
 586         for method_id 
in range(method_count
): 
 589             for _ 
in range(param_count
): 
 591             u30()  # name index (always 0 for youtube) 
 593             if flags 
& 0x08 != 0: 
 596                 for c 
in range(option_count
): 
 599             if flags 
& 0x80 != 0: 
 600                 # Param names present 
 601                 for _ 
in range(param_count
): 
 603             mi 
= MethodInfo(flags 
& 0x01 != 0, flags 
& 0x04 != 0) 
 604             method_infos
.append(mi
) 
 607         metadata_count 
= u30() 
 608         for _c 
in range(metadata_count
): 
 611             for _c2 
in range(item_count
): 
 615         def parse_traits_info(): 
 616             trait_name_idx 
= u30() 
 617             kind_full 
= read_byte() 
 618             kind 
= kind_full 
& 0x0f 
 619             attrs 
= kind_full 
>> 4 
 621             if kind 
in [0x00, 0x06]:  # Slot or Const 
 623                 u30()  # type_name_idx 
 627             elif kind 
in [0x01, 0x02, 0x03]:  # Method / Getter / Setter 
 630                 methods
[multinames
[trait_name_idx
]] = method_idx
 
 631             elif kind 
== 0x04:  # Class 
 634             elif kind 
== 0x05:  # Function 
 637                 methods
[function_idx
] = multinames
[trait_name_idx
] 
 639                 raise ExtractorError(u
'Unsupported trait kind %d' % kind
) 
 641             if attrs 
& 0x4 != 0:  # Metadata present 
 642                 metadata_count 
= u30() 
 643                 for _c3 
in range(metadata_count
): 
 644                     u30()  # metadata index 
 649         TARGET_CLASSNAME 
= u
'SignatureDecipher' 
 650         searched_idx 
= multinames
.index(TARGET_CLASSNAME
) 
 651         searched_class_id 
= None 
 653         for class_id 
in range(class_count
): 
 655             if name_idx 
== searched_idx
: 
 656                 # We found the class we're looking for! 
 657                 searched_class_id 
= class_id
 
 658             u30()  # super_name idx 
 660             if flags 
& 0x08 != 0:  # Protected namespace is present 
 661                 u30()  # protected_ns_idx 
 663             for _c2 
in range(intrf_count
): 
 667             for _c2 
in range(trait_count
): 
 670         if searched_class_id 
is None: 
 671             raise ExtractorError(u
'Target class %r not found' % 
 676         for class_id 
in range(class_count
): 
 679             for _c2 
in range(trait_count
): 
 680                 trait_methods 
= parse_traits_info() 
 681                 if class_id 
== searched_class_id
: 
 682                     method_names
.update(trait_methods
.items()) 
 683                     method_idxs
.update(dict( 
 685                         for name
, idx 
in trait_methods
.items())) 
 689         for _c 
in range(script_count
): 
 692             for _c2 
in range(trait_count
): 
 696         method_body_count 
= u30() 
 697         Method 
= collections
.namedtuple('Method', ['code', 'local_count']) 
 699         for _c 
in range(method_body_count
): 
 703             u30()  # init_scope_depth 
 704             u30()  # max_scope_depth 
 706             code 
= read_bytes(code_length
) 
 707             if method_idx 
in method_idxs
: 
 708                 m 
= Method(code
, local_count
) 
 709                 methods
[method_idxs
[method_idx
]] = m
 
 710             exception_count 
= u30() 
 711             for _c2 
in range(exception_count
): 
 718             for _c2 
in range(trait_count
): 
 721         assert p 
+ code_reader
.tell() == len(code_tag
) 
 722         assert len(methods
) == len(method_idxs
) 
 724         method_pyfunctions 
= {} 
 726         def extract_function(func_name
): 
 727             if func_name 
in method_pyfunctions
: 
 728                 return method_pyfunctions
[func_name
] 
 729             if func_name 
not in methods
: 
 730                 raise ExtractorError(u
'Cannot find function %r' % func_name
) 
 731             m 
= methods
[func_name
] 
 734                 registers 
= ['(this)'] + list(args
) + [None] * m
.local_count
 
 736                 coder 
= io
.BytesIO(m
.code
) 
 738                     opcode 
= struct
.unpack('!B', coder
.read(1))[0] 
 739                     if opcode 
== 36:  # pushbyte 
 740                         v 
= struct
.unpack('!B', coder
.read(1))[0] 
 742                     elif opcode 
== 44:  # pushstring 
 744                         stack
.append(constant_strings
[idx
]) 
 745                     elif opcode 
== 48:  # pushscope 
 746                         # We don't implement the scope register, so we'll just 
 747                         # ignore the popped value 
 749                     elif opcode 
== 70:  # callproperty 
 751                         mname 
= multinames
[index
] 
 752                         arg_count 
= u30(coder
) 
 753                         args 
= list(reversed( 
 754                             [stack
.pop() for _ 
in range(arg_count
)])) 
 756                         if mname 
== u
'split': 
 757                             assert len(args
) == 1 
 758                             assert isinstance(args
[0], compat_str
) 
 759                             assert isinstance(obj
, compat_str
) 
 763                                 res 
= obj
.split(args
[0]) 
 765                         elif mname 
== u
'slice': 
 766                             assert len(args
) == 1 
 767                             assert isinstance(args
[0], int) 
 768                             assert isinstance(obj
, list) 
 771                         elif mname 
== u
'join': 
 772                             assert len(args
) == 1 
 773                             assert isinstance(args
[0], compat_str
) 
 774                             assert isinstance(obj
, list) 
 775                             res 
= args
[0].join(obj
) 
 777                         elif mname 
in method_pyfunctions
: 
 778                             stack
.append(method_pyfunctions
[mname
](args
)) 
 780                             raise NotImplementedError( 
 781                                 u
'Unsupported property %r on %r' 
 783                     elif opcode 
== 72:  # returnvalue 
 786                     elif opcode 
== 79:  # callpropvoid 
 788                         mname 
= multinames
[index
] 
 789                         arg_count 
= u30(coder
) 
 790                         args 
= list(reversed( 
 791                             [stack
.pop() for _ 
in range(arg_count
)])) 
 793                         if mname 
== u
'reverse': 
 794                             assert isinstance(obj
, list) 
 797                             raise NotImplementedError( 
 798                                 u
'Unsupported (void) property %r on %r' 
 800                     elif opcode 
== 93:  # findpropstrict 
 802                         mname 
= multinames
[index
] 
 803                         res 
= extract_function(mname
) 
 805                     elif opcode 
== 97:  # setproperty 
 810                         assert isinstance(obj
, list) 
 811                         assert isinstance(idx
, int) 
 813                     elif opcode 
== 98:  # getlocal 
 815                         stack
.append(registers
[index
]) 
 816                     elif opcode 
== 99:  # setlocal 
 819                         registers
[index
] = value
 
 820                     elif opcode 
== 102:  # getproperty 
 822                         pname 
= multinames
[index
] 
 823                         if pname 
== u
'length': 
 825                             assert isinstance(obj
, list) 
 826                             stack
.append(len(obj
)) 
 827                         else:  # Assume attribute access 
 829                             assert isinstance(idx
, int) 
 831                             assert isinstance(obj
, list) 
 832                             stack
.append(obj
[idx
]) 
 833                     elif opcode 
== 128:  # coerce 
 835                     elif opcode 
== 133:  # coerce_s 
 836                         assert isinstance(stack
[-1], (type(None), compat_str
)) 
 837                     elif opcode 
== 164:  # modulo 
 840                         res 
= value1 
% value2
 
 842                     elif opcode 
== 208:  # getlocal_0 
 843                         stack
.append(registers
[0]) 
 844                     elif opcode 
== 209:  # getlocal_1 
 845                         stack
.append(registers
[1]) 
 846                     elif opcode 
== 210:  # getlocal_2 
 847                         stack
.append(registers
[2]) 
 848                     elif opcode 
== 211:  # getlocal_3 
 849                         stack
.append(registers
[3]) 
 850                     elif opcode 
== 214:  # setlocal_2 
 851                         registers
[2] = stack
.pop() 
 852                     elif opcode 
== 215:  # setlocal_3 
 853                         registers
[3] = stack
.pop() 
 855                         raise NotImplementedError( 
 856                             u
'Unsupported opcode %d' % opcode
) 
 858             method_pyfunctions
[func_name
] = resfunc
 
 861         initial_function 
= extract_function(u
'decipher') 
 862         return lambda s
: initial_function([s
]) 
 864     def _decrypt_signature(self
, s
, video_id
, player_url
, age_gate
=False): 
 865         """Turn the encrypted s field into a working signature""" 
 867         if player_url 
is not None: 
 868             if player_url
.startswith(u
'//'): 
 869                 player_url 
= u
'https:' + player_url
 
 871                 player_id 
= (player_url
, len(s
)) 
 872                 if player_id 
not in self
._player
_cache
: 
 873                     func 
= self
._extract
_signature
_function
( 
 874                         video_id
, player_url
, len(s
) 
 876                     self
._player
_cache
[player_id
] = func
 
 877                 func 
= self
._player
_cache
[player_id
] 
 878                 if self
._downloader
.params
.get('youtube_print_sig_code'): 
 879                     self
._print
_sig
_code
(func
, len(s
)) 
 882                 tb 
= traceback
.format_exc() 
 883                 self
._downloader
.report_warning( 
 884                     u
'Automatic signature extraction failed: ' + tb
) 
 886             self
._downloader
.report_warning( 
 887                 u
'Warning: Falling back to static signature algorithm') 
 889         return self
._static
_decrypt
_signature
( 
 890             s
, video_id
, player_url
, age_gate
) 
 892     def _static_decrypt_signature(self
, s
, video_id
, player_url
, age_gate
): 
 894             # The videos with age protection use another player, so the 
 895             # algorithms can be different. 
 897                 return s
[2:63] + s
[82] + s
[64:82] + s
[63] 
 900             return s
[86:29:-1] + s
[88] + s
[28:5:-1] 
 902             return s
[25] + s
[3:25] + s
[0] + s
[26:42] + s
[79] + s
[43:79] + s
[91] + s
[80:83] 
 904             return s
[84:27:-1] + s
[86] + s
[26:5:-1] 
 906             return s
[25] + s
[3:25] + s
[2] + s
[26:40] + s
[77] + s
[41:77] + s
[89] + s
[78:81] 
 908             return s
[84:78:-1] + s
[87] + s
[77:60:-1] + s
[0] + s
[59:3:-1] 
 910             return s
[7:28] + s
[87] + s
[29:45] + s
[55] + s
[46:55] + s
[2] + s
[56:87] + s
[28] 
 912             return s
[6:27] + s
[4] + s
[28:39] + s
[27] + s
[40:59] + s
[2] + s
[60:] 
 914             return s
[80:72:-1] + s
[16] + s
[71:39:-1] + s
[72] + s
[38:16:-1] + s
[82] + s
[15::-1] 
 916             return s
[3:11] + s
[0] + s
[12:55] + s
[84] + s
[56:84] 
 918             return s
[78:70:-1] + s
[14] + s
[69:37:-1] + s
[70] + s
[36:14:-1] + s
[80] + s
[:14][::-1] 
 920             return s
[80:63:-1] + s
[0] + s
[62:0:-1] + s
[63] 
 922             return s
[80:37:-1] + s
[7] + s
[36:7:-1] + s
[0] + s
[6:0:-1] + s
[37] 
 924             return s
[56] + s
[79:56:-1] + s
[41] + s
[55:41:-1] + s
[80] + s
[40:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
 926             return s
[1:19] + s
[0] + s
[20:68] + s
[19] + s
[69:80] 
 928             return s
[54] + s
[77:54:-1] + s
[39] + s
[53:39:-1] + s
[78] + s
[38:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9] 
 931             raise ExtractorError(u
'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s
))) 
 933     def _get_available_subtitles(self
, video_id
, webpage
): 
 935             sub_list 
= self
._download
_webpage
( 
 936                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
, 
 937                 video_id
, note
=False) 
 938         except ExtractorError 
as err
: 
 939             self
._downloader
.report_warning(u
'unable to download video subtitles: %s' % compat_str(err
)) 
 941         lang_list 
= re
.findall(r
'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list
) 
 946             params 
= compat_urllib_parse
.urlencode({ 
 949                 'fmt': self
._downloader
.params
.get('subtitlesformat', 'srt'), 
 950                 'name': unescapeHTML(l
[0]).encode('utf-8'), 
 952             url 
= u
'https://www.youtube.com/api/timedtext?' + params
 
 953             sub_lang_list
[lang
] = url
 
 954         if not sub_lang_list
: 
 955             self
._downloader
.report_warning(u
'video doesn\'t have subtitles') 
 959     def _get_available_automatic_caption(self
, video_id
, webpage
): 
 960         """We need the webpage for getting the captions url, pass it as an 
 961            argument to speed up the process.""" 
 962         sub_format 
= self
._downloader
.params
.get('subtitlesformat', 'srt') 
 963         self
.to_screen(u
'%s: Looking for automatic captions' % video_id
) 
 964         mobj 
= re
.search(r
';ytplayer.config = ({.*?});', webpage
) 
 965         err_msg 
= u
'Couldn\'t find automatic captions for %s' % video_id
 
 967             self
._downloader
.report_warning(err_msg
) 
 969         player_config 
= json
.loads(mobj
.group(1)) 
 971             args 
= player_config
[u
'args'] 
 972             caption_url 
= args
[u
'ttsurl'] 
 973             timestamp 
= args
[u
'timestamp'] 
 974             # We get the available subtitles 
 975             list_params 
= compat_urllib_parse
.urlencode({ 
 980             list_url 
= caption_url 
+ '&' + list_params
 
 981             caption_list 
= self
._download
_xml
(list_url
, video_id
) 
 982             original_lang_node 
= caption_list
.find('track') 
 983             if original_lang_node 
is None or original_lang_node
.attrib
.get('kind') != 'asr' : 
 984                 self
._downloader
.report_warning(u
'Video doesn\'t have automatic captions') 
 986             original_lang 
= original_lang_node
.attrib
['lang_code'] 
 989             for lang_node 
in caption_list
.findall('target'): 
 990                 sub_lang 
= lang_node
.attrib
['lang_code'] 
 991                 params 
= compat_urllib_parse
.urlencode({ 
 992                     'lang': original_lang
, 
 998                 sub_lang_list
[sub_lang
] = caption_url 
+ '&' + params
 
1000         # An extractor error can be raise by the download process if there are 
1001         # no automatic captions but there are subtitles 
1002         except (KeyError, ExtractorError
): 
1003             self
._downloader
.report_warning(err_msg
) 
1007     def extract_id(cls
, url
): 
1008         mobj 
= re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) 
1010             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1011         video_id 
= mobj
.group(2) 
1014     def _extract_from_m3u8(self
, manifest_url
, video_id
): 
1016         def _get_urls(_manifest
): 
1017             lines 
= _manifest
.split('\n') 
1018             urls 
= filter(lambda l
: l 
and not l
.startswith('#'), 
1021         manifest 
= self
._download
_webpage
(manifest_url
, video_id
, u
'Downloading formats manifest') 
1022         formats_urls 
= _get_urls(manifest
) 
1023         for format_url 
in formats_urls
: 
1024             itag 
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag') 
1025             url_map
[itag
] = format_url
 
1028     def _extract_annotations(self
, video_id
): 
1029         url 
= 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 
1030         return self
._download
_webpage
(url
, video_id
, note
=u
'Searching for annotations.', errnote
=u
'Unable to download video annotations.') 
1032     def _real_extract(self
, url
): 
1034             u
'http' if self
._downloader
.params
.get('prefer_insecure', False) 
1037         # Extract original video URL from URL with redirection, like age verification, using next_url parameter 
1038         mobj 
= re
.search(self
._NEXT
_URL
_RE
, url
) 
1040             url 
= proto 
+ '://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/') 
1041         video_id 
= self
.extract_id(url
) 
1044         url 
= proto 
+ '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
 
1045         video_webpage 
= self
._download
_webpage
(url
, video_id
) 
1047         # Attempt to extract SWF player URL 
1048         mobj 
= re
.search(r
'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1049         if mobj 
is not None: 
1050             player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1055         self
.report_video_info_webpage_download(video_id
) 
1056         if re
.search(r
'player-age-gate-content">', video_webpage
) is not None: 
1057             self
.report_age_confirmation() 
1059             # We simulate the access to the video from www.youtube.com/v/{video_id} 
1060             # this can be viewed without login into Youtube 
1061             data 
= compat_urllib_parse
.urlencode({'video_id': video_id
, 
1062                                                   'el': 'player_embedded', 
1065                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id
, 
1069             video_info_url 
= proto 
+ '://www.youtube.com/get_video_info?' + data
 
1070             video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
1072                                     errnote
='unable to download video info webpage') 
1073             video_info 
= compat_parse_qs(video_info_webpage
) 
1076             for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1077                 video_info_url 
= (proto 
+ '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1078                         % (video_id
, el_type
)) 
1079                 video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
1081                                         errnote
='unable to download video info webpage') 
1082                 video_info 
= compat_parse_qs(video_info_webpage
) 
1083                 if 'token' in video_info
: 
1085         if 'token' not in video_info
: 
1086             if 'reason' in video_info
: 
1087                 raise ExtractorError( 
1088                     u
'YouTube said: %s' % video_info
['reason'][0], 
1089                     expected
=True, video_id
=video_id
) 
1091                 raise ExtractorError( 
1092                     u
'"token" parameter not in video info for unknown reason', 
1095         if 'view_count' in video_info
: 
1096             view_count 
= int(video_info
['view_count'][0]) 
1100         # Check for "rental" videos 
1101         if 'ypc_video_rental_bar_text' in video_info 
and 'author' not in video_info
: 
1102             raise ExtractorError(u
'"rental" videos not supported') 
1104         # Start extracting information 
1105         self
.report_information_extraction(video_id
) 
1108         if 'author' not in video_info
: 
1109             raise ExtractorError(u
'Unable to extract uploader name') 
1110         video_uploader 
= compat_urllib_parse
.unquote_plus(video_info
['author'][0]) 
1113         video_uploader_id 
= None 
1114         mobj 
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
) 
1115         if mobj 
is not None: 
1116             video_uploader_id 
= mobj
.group(1) 
1118             self
._downloader
.report_warning(u
'unable to extract uploader nickname') 
1121         if 'title' in video_info
: 
1122             video_title 
= video_info
['title'][0] 
1124             self
._downloader
.report_warning(u
'Unable to extract video title') 
1128         # We try first to get a high quality image: 
1129         m_thumb 
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">', 
1130                             video_webpage
, re
.DOTALL
) 
1131         if m_thumb 
is not None: 
1132             video_thumbnail 
= m_thumb
.group(1) 
1133         elif 'thumbnail_url' not in video_info
: 
1134             self
._downloader
.report_warning(u
'unable to extract video thumbnail') 
1135             video_thumbnail 
= None 
1136         else:   # don't panic if we can't find it 
1137             video_thumbnail 
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0]) 
1141         mobj 
= re
.search(r
'(?s)id="eow-date.*?>(.*?)</span>', video_webpage
) 
1144                 r
'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>', 
1146         if mobj 
is not None: 
1147             upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1148             upload_date 
= unified_strdate(upload_date
) 
1150         m_cat_container 
= get_element_by_id("eow-category", video_webpage
) 
1152             category 
= self
._html
_search
_regex
( 
1153                 r
'(?s)<a[^<]+>(.*?)</a>', m_cat_container
, 'category', 
1155             video_categories 
= None if category 
is None else [category
] 
1157             video_categories 
= None 
1160         video_description 
= get_element_by_id("eow-description", video_webpage
) 
1161         if video_description
: 
1162             video_description 
= re
.sub(r
'''(?x) 
1164                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
1166                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
1167                     class="yt-uix-redirect-link"\s*> 
1170             ''', r
'\1', video_description
) 
1171             video_description 
= clean_html(video_description
) 
1173             fd_mobj 
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
) 
1175                 video_description 
= unescapeHTML(fd_mobj
.group(1)) 
1177                 video_description 
= u
'' 
1179         def _extract_count(klass
): 
1180             count 
= self
._search
_regex
( 
1181                 r
'class="%s">([\d,]+)</span>' % re
.escape(klass
), 
1182                 video_webpage
, klass
, default
=None) 
1183             if count 
is not None: 
1184                 return int(count
.replace(',', '')) 
1186         like_count 
= _extract_count(u
'likes-count') 
1187         dislike_count 
= _extract_count(u
'dislikes-count') 
1190         video_subtitles 
= self
.extract_subtitles(video_id
, video_webpage
) 
1192         if self
._downloader
.params
.get('listsubtitles', False): 
1193             self
._list
_available
_subtitles
(video_id
, video_webpage
) 
1196         if 'length_seconds' not in video_info
: 
1197             self
._downloader
.report_warning(u
'unable to extract video duration') 
1198             video_duration 
= None 
1200             video_duration 
= int(compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0])) 
1203         video_annotations 
= None 
1204         if self
._downloader
.params
.get('writeannotations', False): 
1205                 video_annotations 
= self
._extract
_annotations
(video_id
) 
1207         # Decide which formats to download 
1209             mobj 
= re
.search(r
';ytplayer\.config\s*=\s*({.*?});', video_webpage
) 
1211                 raise ValueError('Could not find vevo ID') 
1212             json_code 
= uppercase_escape(mobj
.group(1)) 
1213             ytplayer_config 
= json
.loads(json_code
) 
1214             args 
= ytplayer_config
['args'] 
1215             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map 
1216             # this signatures are encrypted 
1217             if 'url_encoded_fmt_stream_map' not in args
: 
1218                 raise ValueError(u
'No stream_map present')  # caught below 
1219             re_signature 
= re
.compile(r
'[&,]s=') 
1220             m_s 
= re_signature
.search(args
['url_encoded_fmt_stream_map']) 
1222                 self
.to_screen(u
'%s: Encrypted signatures detected.' % video_id
) 
1223                 video_info
['url_encoded_fmt_stream_map'] = [args
['url_encoded_fmt_stream_map']] 
1224             m_s 
= re_signature
.search(args
.get('adaptive_fmts', u
'')) 
1226                 if 'adaptive_fmts' in video_info
: 
1227                     video_info
['adaptive_fmts'][0] += ',' + args
['adaptive_fmts'] 
1229                     video_info
['adaptive_fmts'] = [args
['adaptive_fmts']] 
1233         def _map_to_format_list(urlmap
): 
1235             for itag
, video_real_url 
in urlmap
.items(): 
1238                     'url': video_real_url
, 
1239                     'player_url': player_url
, 
1241                 if itag 
in self
._formats
: 
1242                     dct
.update(self
._formats
[itag
]) 
1246         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1247             self
.report_rtmp_download() 
1249                 'format_id': '_rtmp', 
1251                 'url': video_info
['conn'][0], 
1252                 'player_url': player_url
, 
1254         elif len(video_info
.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info
.get('adaptive_fmts', [])) >= 1: 
1255             encoded_url_map 
= video_info
.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info
.get('adaptive_fmts',[''])[0] 
1256             if 'rtmpe%3Dyes' in encoded_url_map
: 
1257                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True) 
1259             for url_data_str 
in encoded_url_map
.split(','): 
1260                 url_data 
= compat_parse_qs(url_data_str
) 
1261                 if 'itag' in url_data 
and 'url' in url_data
: 
1262                     url 
= url_data
['url'][0] 
1263                     if 'sig' in url_data
: 
1264                         url 
+= '&signature=' + url_data
['sig'][0] 
1265                     elif 's' in url_data
: 
1266                         encrypted_sig 
= url_data
['s'][0] 
1267                         if self
._downloader
.params
.get('verbose'): 
1269                                 if player_url 
is None: 
1270                                     player_version 
= 'unknown' 
1272                                     player_version 
= self
._search
_regex
( 
1273                                         r
'-(.+)\.swf$', player_url
, 
1274                                         u
'flash player', fatal
=False) 
1275                                 player_desc 
= 'flash player %s' % player_version
 
1277                                 player_version 
= self
._search
_regex
( 
1278                                     r
'html5player-(.+?)\.js', video_webpage
, 
1279                                     'html5 player', fatal
=False) 
1280                                 player_desc 
= u
'html5 player %s' % player_version
 
1282                             parts_sizes 
= u
'.'.join(compat_str(len(part
)) for part 
in encrypted_sig
.split('.')) 
1283                             self
.to_screen(u
'encrypted signature length %d (%s), itag %s, %s' % 
1284                                 (len(encrypted_sig
), parts_sizes
, url_data
['itag'][0], player_desc
)) 
1287                             jsplayer_url_json 
= self
._search
_regex
( 
1288                                 r
'"assets":.+?"js":\s*("[^"]+")', 
1289                                 video_webpage
, u
'JS player URL') 
1290                             player_url 
= json
.loads(jsplayer_url_json
) 
1292                         signature 
= self
._decrypt
_signature
( 
1293                             encrypted_sig
, video_id
, player_url
, age_gate
) 
1294                         url 
+= '&signature=' + signature
 
1295                     if 'ratebypass' not in url
: 
1296                         url 
+= '&ratebypass=yes' 
1297                     url_map
[url_data
['itag'][0]] = url
 
1298             formats 
= _map_to_format_list(url_map
) 
1299         elif video_info
.get('hlsvp'): 
1300             manifest_url 
= video_info
['hlsvp'][0] 
1301             url_map 
= self
._extract
_from
_m
3u8(manifest_url
, video_id
) 
1302             formats 
= _map_to_format_list(url_map
) 
1304             raise ExtractorError(u
'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') 
1306         # Look for the DASH manifest 
1307         if (self
._downloader
.params
.get('youtube_include_dash_manifest', False)): 
1309                 # The DASH manifest used needs to be the one from the original video_webpage. 
1310                 # The one found in get_video_info seems to be using different signatures. 
1311                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. 
1312                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the 
1313                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. 
1315                     dash_manifest_url 
= video_info
.get('dashmpd')[0] 
1317                     dash_manifest_url 
= ytplayer_config
['args']['dashmpd'] 
1318                 def decrypt_sig(mobj
): 
1320                     dec_s 
= self
._decrypt
_signature
(s
, video_id
, player_url
, age_gate
) 
1321                     return '/signature/%s' % dec_s
 
1322                 dash_manifest_url 
= re
.sub(r
'/s/([\w\.]+)', decrypt_sig
, dash_manifest_url
) 
1323                 dash_doc 
= self
._download
_xml
( 
1324                     dash_manifest_url
, video_id
, 
1325                     note
=u
'Downloading DASH manifest', 
1326                     errnote
=u
'Could not download DASH manifest') 
1327                 for r 
in dash_doc
.findall(u
'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): 
1328                     url_el 
= r
.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') 
1331                     format_id 
= r
.attrib
['id'] 
1332                     video_url 
= url_el
.text
 
1333                     filesize 
= int_or_none(url_el
.attrib
.get('{http://youtube.com/yt/2012/10/10}contentLength')) 
1335                         'format_id': format_id
, 
1337                         'width': int_or_none(r
.attrib
.get('width')), 
1338                         'tbr': int_or_none(r
.attrib
.get('bandwidth'), 1000), 
1339                         'asr': int_or_none(r
.attrib
.get('audioSamplingRate')), 
1340                         'filesize': filesize
, 
1343                         existing_format 
= next( 
1344                             fo 
for fo 
in formats
 
1345                             if fo
['format_id'] == format_id
) 
1346                     except StopIteration: 
1347                         f
.update(self
._formats
.get(format_id
, {})) 
1350                         existing_format
.update(f
) 
1352             except (ExtractorError
, KeyError) as e
: 
1353                 self
.report_warning(u
'Skipping DASH manifest: %s' % e
, video_id
) 
1355         self
._sort
_formats
(formats
) 
1359             'uploader':     video_uploader
, 
1360             'uploader_id':  video_uploader_id
, 
1361             'upload_date':  upload_date
, 
1362             'title':        video_title
, 
1363             'thumbnail':    video_thumbnail
, 
1364             'description':  video_description
, 
1365             'categories':   video_categories
, 
1366             'subtitles':    video_subtitles
, 
1367             'duration':     video_duration
, 
1368             'age_limit':    18 if age_gate 
else 0, 
1369             'annotations':  video_annotations
, 
1370             'webpage_url': proto 
+ '://www.youtube.com/watch?v=%s' % video_id
, 
1371             'view_count':   view_count
, 
1372             'like_count': like_count
, 
1373             'dislike_count': dislike_count
, 
1377 class YoutubePlaylistIE(YoutubeBaseInfoExtractor
): 
1378     IE_DESC 
= u
'YouTube.com playlists' 
1379     _VALID_URL 
= r
"""(?x)(?: 
1384                            (?:course|view_play_list|my_playlists|artist|playlist|watch) 
1385                            \? (?:.*?&)*? (?:p|a|list)= 
1389                             (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} 
1390                             # Top tracks, they can also include dots  
1395                         ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) 
1397     _TEMPLATE_URL 
= 'https://www.youtube.com/playlist?list=%s' 
1398     _MORE_PAGES_INDICATOR 
= r
'data-link-type="next"' 
1399     _VIDEO_RE 
= r
'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' 
1400     IE_NAME 
= u
'youtube:playlist' 
1402     def _real_initialize(self
): 
1405     def _ids_to_results(self
, ids
): 
1406         return [self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
) 
1409     def _extract_mix(self
, playlist_id
): 
1410         # The mixes are generated from a a single video 
1411         # the id of the playlist is just 'RD' + video_id 
1412         url 
= 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id
[-11:], playlist_id
) 
1413         webpage 
= self
._download
_webpage
(url
, playlist_id
, u
'Downloading Youtube mix') 
1414         search_title 
= lambda class_name
: get_element_by_attribute('class', class_name
, webpage
) 
1415         title_span 
= (search_title('playlist-title') or 
1416             search_title('title long-title') or search_title('title')) 
1417         title 
= clean_html(title_span
) 
1418         video_re 
= r
'''(?x)data-video-username=".*?".*? 
1419                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re
.escape(playlist_id
) 
1420         ids 
= orderedSet(re
.findall(video_re
, webpage
, flags
=re
.DOTALL
)) 
1421         url_results 
= self
._ids
_to
_results
(ids
) 
1423         return self
.playlist_result(url_results
, playlist_id
, title
) 
1425     def _real_extract(self
, url
): 
1426         # Extract playlist id 
1427         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1429             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1430         playlist_id 
= mobj
.group(1) or mobj
.group(2) 
1432         # Check if it's a video-specific URL 
1433         query_dict 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
1434         if 'v' in query_dict
: 
1435             video_id 
= query_dict
['v'][0] 
1436             if self
._downloader
.params
.get('noplaylist'): 
1437                 self
.to_screen(u
'Downloading just video %s because of --no-playlist' % video_id
) 
1438                 return self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1440                 self
.to_screen(u
'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id
, video_id
)) 
1442         if playlist_id
.startswith('RD'): 
1443             # Mixes require a custom extraction process 
1444             return self
._extract
_mix
(playlist_id
) 
1445         if playlist_id
.startswith('TL'): 
1446             raise ExtractorError(u
'For downloading YouTube.com top lists, use ' 
1447                 u
'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected
=True) 
1449         url 
= self
._TEMPLATE
_URL 
% playlist_id
 
1450         page 
= self
._download
_webpage
(url
, playlist_id
) 
1451         more_widget_html 
= content_html 
= page
 
1453         # Check if the playlist exists or is private 
1454         if re
.search(r
'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page
) is not None: 
1455             raise ExtractorError( 
1456                 u
'The playlist doesn\'t exist or is private, use --username or ' 
1457                 '--netrc to access it.', 
1460         # Extract the video ids from the playlist pages 
1463         for page_num 
in itertools
.count(1): 
1464             matches 
= re
.finditer(self
._VIDEO
_RE
, content_html
) 
1465             # We remove the duplicates and the link with index 0 
1466             # (it's not the first video of the playlist) 
1467             new_ids 
= orderedSet(m
.group('id') for m 
in matches 
if m
.group('index') != '0') 
1470             mobj 
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
) 
1474             more 
= self
._download
_json
( 
1475                 'https://youtube.com/%s' % mobj
.group('more'), playlist_id
, 
1476                 'Downloading page #%s' % page_num
, 
1477                 transform_source
=uppercase_escape
) 
1478             content_html 
= more
['content_html'] 
1479             more_widget_html 
= more
['load_more_widget_html'] 
1481         playlist_title 
= self
._html
_search
_regex
( 
1482             r
'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', 
1485         url_results 
= self
._ids
_to
_results
(ids
) 
1486         return self
.playlist_result(url_results
, playlist_id
, playlist_title
) 
1489 class YoutubeTopListIE(YoutubePlaylistIE
): 
1490     IE_NAME 
= u
'youtube:toplist' 
1491     IE_DESC 
= (u
'YouTube.com top lists, "yttoplist:{channel}:{list title}"' 
1492         u
' (Example: "yttoplist:music:Top Tracks")') 
1493     _VALID_URL 
= r
'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' 
1495     def _real_extract(self
, url
): 
1496         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1497         channel 
= mobj
.group('chann') 
1498         title 
= mobj
.group('title') 
1499         query 
= compat_urllib_parse
.urlencode({'title': title
}) 
1500         playlist_re 
= 'href="([^"]+?%s.*?)"' % re
.escape(query
) 
1501         channel_page 
= self
._download
_webpage
('https://www.youtube.com/%s' % channel
, title
) 
1502         link 
= self
._html
_search
_regex
(playlist_re
, channel_page
, u
'list') 
1503         url 
= compat_urlparse
.urljoin('https://www.youtube.com/', link
) 
1505         video_re 
= r
'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' 
1507         # sometimes the webpage doesn't contain the videos 
1508         # retry until we get them 
1509         for i 
in itertools
.count(0): 
1510             msg 
= u
'Downloading Youtube mix' 
1512                 msg 
+= ', retry #%d' % i
 
1513             webpage 
= self
._download
_webpage
(url
, title
, msg
) 
1514             ids 
= orderedSet(re
.findall(video_re
, webpage
)) 
1517         url_results 
= self
._ids
_to
_results
(ids
) 
1518         return self
.playlist_result(url_results
, playlist_title
=title
) 
1521 class YoutubeChannelIE(InfoExtractor
): 
1522     IE_DESC 
= u
'YouTube.com channels' 
1523     _VALID_URL 
= r
"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" 
1524     _MORE_PAGES_INDICATOR 
= 'yt-uix-load-more' 
1525     _MORE_PAGES_URL 
= 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' 
1526     IE_NAME 
= u
'youtube:channel' 
1528     def extract_videos_from_page(self
, page
): 
1530         for mobj 
in re
.finditer(r
'href="/watch\?v=([0-9A-Za-z_-]+)&?', page
): 
1531             if mobj
.group(1) not in ids_in_page
: 
1532                 ids_in_page
.append(mobj
.group(1)) 
1535     def _real_extract(self
, url
): 
1536         # Extract channel id 
1537         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1539             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1541         # Download channel page 
1542         channel_id 
= mobj
.group(1) 
1544         url 
= 'https://www.youtube.com/channel/%s/videos' % channel_id
 
1545         channel_page 
= self
._download
_webpage
(url
, channel_id
) 
1546         autogenerated 
= re
.search(r
'''(?x) 
1548                     channel-header-autogenerated-label| 
1549                     yt-channel-title-autogenerated 
1550                 )[^"]*"''', channel_page
) is not None 
1553             # The videos are contained in a single page 
1554             # the ajax pages can't be used, they are empty 
1555             video_ids 
= self
.extract_videos_from_page(channel_page
) 
1557             # Download all channel pages using the json-based channel_ajax query 
1558             for pagenum 
in itertools
.count(1): 
1559                 url 
= self
._MORE
_PAGES
_URL 
% (pagenum
, channel_id
) 
1560                 page 
= self
._download
_json
( 
1561                     url
, channel_id
, note
=u
'Downloading page #%s' % pagenum
, 
1562                     transform_source
=uppercase_escape
) 
1564                 ids_in_page 
= self
.extract_videos_from_page(page
['content_html']) 
1565                 video_ids
.extend(ids_in_page
) 
1567                 if self
._MORE
_PAGES
_INDICATOR 
not in page
['load_more_widget_html']: 
1570         self
._downloader
.to_screen(u
'[youtube] Channel %s: Found %i videos' % (channel_id
, len(video_ids
))) 
1572         url_entries 
= [self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1573                        for video_id 
in video_ids
] 
1574         return self
.playlist_result(url_entries
, channel_id
) 
1577 class YoutubeUserIE(InfoExtractor
): 
1578     IE_DESC 
= u
'YouTube.com user videos (URL or "ytuser" keyword)' 
1579     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' 
1580     _TEMPLATE_URL 
= 'https://gdata.youtube.com/feeds/api/users/%s' 
1581     _GDATA_PAGE_SIZE 
= 50 
1582     _GDATA_URL 
= 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' 
1583     IE_NAME 
= u
'youtube:user' 
1586     def suitable(cls
, url
): 
1587         # Don't return True if the url can be extracted with other youtube 
1588         # extractor, the regex would is too permissive and it would match. 
1589         other_ies 
= iter(klass 
for (name
, klass
) in globals().items() if name
.endswith('IE') and klass 
is not cls
) 
1590         if any(ie
.suitable(url
) for ie 
in other_ies
): return False 
1591         else: return super(YoutubeUserIE
, cls
).suitable(url
) 
1593     def _real_extract(self
, url
): 
1595         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1597             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1599         username 
= mobj
.group(1) 
1601         # Download video ids using YouTube Data API. Result size per 
1602         # query is limited (currently to 50 videos) so we need to query 
1603         # page by page until there are no video ids - it means we got 
1606         def download_page(pagenum
): 
1607             start_index 
= pagenum 
* self
._GDATA
_PAGE
_SIZE 
+ 1 
1609             gdata_url 
= self
._GDATA
_URL 
% (username
, self
._GDATA
_PAGE
_SIZE
, start_index
) 
1610             page 
= self
._download
_webpage
( 
1611                 gdata_url
, username
, 
1612                 u
'Downloading video ids from %d to %d' % ( 
1613                     start_index
, start_index 
+ self
._GDATA
_PAGE
_SIZE
)) 
1616                 response 
= json
.loads(page
) 
1617             except ValueError as err
: 
1618                 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
)) 
1619             if 'entry' not in response
['feed']: 
1622             # Extract video identifiers 
1623             entries 
= response
['feed']['entry'] 
1624             for entry 
in entries
: 
1625                 title 
= entry
['title']['$t'] 
1626                 video_id 
= entry
['id']['$t'].split('/')[-1] 
1630                     'ie_key': 'Youtube', 
1634         url_results 
= PagedList(download_page
, self
._GDATA
_PAGE
_SIZE
) 
1636         return self
.playlist_result(url_results
, playlist_title
=username
) 
1639 class YoutubeSearchIE(SearchInfoExtractor
): 
1640     IE_DESC 
= u
'YouTube.com searches' 
1641     _API_URL 
= u
'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' 
1643     IE_NAME 
= u
'youtube:search' 
1644     _SEARCH_KEY 
= 'ytsearch' 
1646     def _get_n_results(self
, query
, n
): 
1647         """Get a specified number of results for a query""" 
1654         while (PAGE_SIZE 
* pagenum
) < limit
: 
1655             result_url 
= self
._API
_URL 
% ( 
1656                 compat_urllib_parse
.quote_plus(query
.encode('utf-8')), 
1657                 (PAGE_SIZE 
* pagenum
) + 1) 
1658             data_json 
= self
._download
_webpage
( 
1659                 result_url
, video_id
=u
'query "%s"' % query
, 
1660                 note
=u
'Downloading page %s' % (pagenum 
+ 1), 
1661                 errnote
=u
'Unable to download API page') 
1662             data 
= json
.loads(data_json
) 
1663             api_response 
= data
['data'] 
1665             if 'items' not in api_response
: 
1666                 raise ExtractorError( 
1667                     u
'[youtube] No video results', expected
=True) 
1669             new_ids 
= list(video
['id'] for video 
in api_response
['items']) 
1670             video_ids 
+= new_ids
 
1672             limit 
= min(n
, api_response
['totalItems']) 
1675         if len(video_ids
) > n
: 
1676             video_ids 
= video_ids
[:n
] 
1677         videos 
= [self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1678                   for video_id 
in video_ids
] 
1679         return self
.playlist_result(videos
, query
) 
1682 class YoutubeSearchDateIE(YoutubeSearchIE
): 
1683     IE_NAME 
= YoutubeSearchIE
.IE_NAME 
+ ':date' 
1684     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' 
1685     _SEARCH_KEY 
= 'ytsearchdate' 
1686     IE_DESC 
= u
'YouTube.com searches, newest videos first' 
1689 class YoutubeSearchURLIE(InfoExtractor
): 
1690     IE_DESC 
= u
'YouTube.com search URLs' 
1691     IE_NAME 
= u
'youtube:search_url' 
1692     _VALID_URL 
= r
'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' 
1694     def _real_extract(self
, url
): 
1695         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1696         query 
= compat_urllib_parse
.unquote_plus(mobj
.group('query')) 
1698         webpage 
= self
._download
_webpage
(url
, query
) 
1699         result_code 
= self
._search
_regex
( 
1700             r
'(?s)<ol id="search-results"(.*?)</ol>', webpage
, u
'result HTML') 
1702         part_codes 
= re
.findall( 
1703             r
'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code
) 
1705         for part_code 
in part_codes
: 
1706             part_title 
= self
._html
_search
_regex
( 
1707                 r
'(?s)title="([^"]+)"', part_code
, 'item title', fatal
=False) 
1708             part_url_snippet 
= self
._html
_search
_regex
( 
1709                 r
'(?s)href="([^"]+)"', part_code
, 'item URL') 
1710             part_url 
= compat_urlparse
.urljoin( 
1711                 'https://www.youtube.com/', part_url_snippet
) 
1715                 'title': part_title
, 
1719             '_type': 'playlist', 
1725 class YoutubeShowIE(InfoExtractor
): 
1726     IE_DESC 
= u
'YouTube.com (multi-season) shows' 
1727     _VALID_URL 
= r
'https?://www\.youtube\.com/show/(.*)' 
1728     IE_NAME 
= u
'youtube:show' 
1730     def _real_extract(self
, url
): 
1731         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1732         show_name 
= mobj
.group(1) 
1733         webpage 
= self
._download
_webpage
(url
, show_name
, u
'Downloading show webpage') 
1734         # There's one playlist for each season of the show 
1735         m_seasons 
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
)) 
1736         self
.to_screen(u
'%s: Found %s seasons' % (show_name
, len(m_seasons
))) 
1737         return [self
.url_result('https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') for season 
in m_seasons
] 
1740 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
): 
1742     Base class for extractors that fetch info from 
1743     http://www.youtube.com/feed_ajax 
1744     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. 
1746     _LOGIN_REQUIRED 
= True 
1747     # use action_load_personal_feed instead of action_load_system_feed 
1748     _PERSONAL_FEED 
= False 
1751     def _FEED_TEMPLATE(self
): 
1752         action 
= 'action_load_system_feed' 
1753         if self
._PERSONAL
_FEED
: 
1754             action 
= 'action_load_personal_feed' 
1755         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action
, self
._FEED
_NAME
) 
1759         return u
'youtube:%s' % self
._FEED
_NAME
 
1761     def _real_initialize(self
): 
1764     def _real_extract(self
, url
): 
1767         for i 
in itertools
.count(1): 
1768             info 
= self
._download
_json
(self
._FEED
_TEMPLATE 
% paging
, 
1769                                           u
'%s feed' % self
._FEED
_NAME
, 
1770                                           u
'Downloading page %s' % i
) 
1771             feed_html 
= info
.get('feed_html') or info
.get('content_html') 
1772             m_ids 
= re
.finditer(r
'"/watch\?v=(.*?)["&]', feed_html
) 
1773             ids 
= orderedSet(m
.group(1) for m 
in m_ids
) 
1774             feed_entries
.extend( 
1775                 self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1776                 for video_id 
in ids
) 
1778                 r
'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)', 
1782             paging 
= mobj
.group('paging') 
1783         return self
.playlist_result(feed_entries
, playlist_title
=self
._PLAYLIST
_TITLE
) 
1785 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
): 
1786     IE_DESC 
= u
'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' 
1787     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' 
1788     _FEED_NAME 
= 'subscriptions' 
1789     _PLAYLIST_TITLE 
= u
'Youtube Subscriptions' 
1791 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
): 
1792     IE_DESC 
= u
'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' 
1793     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' 
1794     _FEED_NAME 
= 'recommended' 
1795     _PLAYLIST_TITLE 
= u
'Youtube Recommended videos' 
1797 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor
): 
1798     IE_DESC 
= u
'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' 
1799     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' 
1800     _FEED_NAME 
= 'watch_later' 
1801     _PLAYLIST_TITLE 
= u
'Youtube Watch Later' 
1802     _PERSONAL_FEED 
= True 
1804 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor
): 
1805     IE_DESC 
= u
'Youtube watch history, "ythistory" keyword (requires authentication)' 
1806     _VALID_URL 
= u
'https?://www\.youtube\.com/feed/history|:ythistory' 
1807     _FEED_NAME 
= 'history' 
1808     _PERSONAL_FEED 
= True 
1809     _PLAYLIST_TITLE 
= u
'Youtube Watch History' 
1811 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
): 
1812     IE_NAME 
= u
'youtube:favorites' 
1813     IE_DESC 
= u
'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' 
1814     _VALID_URL 
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' 
1815     _LOGIN_REQUIRED 
= True 
1817     def _real_extract(self
, url
): 
1818         webpage 
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') 
1819         playlist_id 
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, u
'favourites playlist id') 
1820         return self
.url_result(playlist_id
, 'YoutubePlaylist') 
1823 class YoutubeTruncatedURLIE(InfoExtractor
): 
1824     IE_NAME 
= 'youtube:truncated_url' 
1825     IE_DESC 
= False  # Do not list 
1826     _VALID_URL 
= r
'''(?x) 
1827         (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| 
1828         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ 
1831     def _real_extract(self
, url
): 
1832         raise ExtractorError( 
1833             u
'Did you forget to quote the URL? Remember that & is a meta ' 
1834             u
'character in most shells, so you want to put the URL in quotes, ' 
1836             u
'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' 
1837             u
' or simply  youtube-dl BaW_jenozKc  .',