Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_HTTPError,
  20     compat_kwargs,
  21     compat_parse_qs,
  22     compat_urllib_parse_unquote,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27     compat_str,
  28 )
  29 from ..utils import (
  30     clean_html,
  31     dict_get,
  32     error_to_compat_str,
  33     ExtractorError,
  34     float_or_none,
  35     get_element_by_attribute,
  36     get_element_by_id,
  37     int_or_none,
  38     mimetype2ext,
  39     orderedSet,
  40     parse_codecs,
  41     parse_duration,
  42     qualities,
  43     remove_quotes,
  44     remove_start,
  45     smuggle_url,
  46     str_or_none,
  47     str_to_int,
  48     try_get,
  49     unescapeHTML,
  50     unified_strdate,
  51     unsmuggle_url,
  52     uppercase_escape,
  53     url_or_none,
  54     urlencode_postdata,
  55 )
  56
  57
  58 class YoutubeBaseInfoExtractor(InfoExtractor):
  59     """Provide base functions for Youtube extractors"""
  60     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  61     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  62
  63     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  64     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  65     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  66
  67     _NETRC_MACHINE = 'youtube'
  68     # If True it will raise an error if no login info is provided
  69     _LOGIN_REQUIRED = False
  70
  71     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  72
  73     def _set_language(self):
  74         self._set_cookie(
  75             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  76             # YouTube sets the expire time to about two months
  77             expire_time=time.time() + 2 * 30 * 24 * 3600)
  78
  79     def _ids_to_results(self, ids):
  80         return [
  81             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  82             for vid_id in ids]
  83
  84     def _login(self):
  85         """
  86         Attempt to log in to YouTube.
  87         True is returned if successful or skipped.
  88         False is returned if login failed.
  89
  90         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  91         """
  92         username, password = self._get_login_info()
  93         # No authentication to be performed
  94         if username is None:
  95             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  96                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  97             return True
  98
  99         login_page = self._download_webpage(
 100             self._LOGIN_URL, None,
 101             note='Downloading login page',
 102             errnote='unable to fetch login page', fatal=False)
 103         if login_page is False:
 104             return
 105
 106         login_form = self._hidden_inputs(login_page)
 107
 108         def req(url, f_req, note, errnote):
 109             data = login_form.copy()
 110             data.update({
 111                 'pstMsg': 1,
 112                 'checkConnection': 'youtube',
 113                 'checkedDomains': 'youtube',
 114                 'hl': 'en',
 115                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 116                 'f.req': json.dumps(f_req),
 117                 'flowName': 'GlifWebSignIn',
 118                 'flowEntry': 'ServiceLogin',
 119             })
 120             return self._download_json(
 121                 url, None, note=note, errnote=errnote,
 122                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 123                 fatal=False,
 124                 data=urlencode_postdata(data), headers={
 125                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 126                     'Google-Accounts-XSRF': 1,
 127                 })
 128
 129         def warn(message):
 130             self._downloader.report_warning(message)
 131
 132         lookup_req = [
 133             username,
 134             None, [], None, 'US', None, None, 2, False, True,
 135             [
 136                 None, None,
 137                 [2, 1, None, 1,
 138                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 139                  None, [], 4],
 140                 1, [None, None, []], None, None, None, True
 141             ],
 142             username,
 143         ]
 144
 145         lookup_results = req(
 146             self._LOOKUP_URL, lookup_req,
 147             'Looking up account info', 'Unable to look up account info')
 148
 149         if lookup_results is False:
 150             return False
 151
 152         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 153         if not user_hash:
 154             warn('Unable to extract user hash')
 155             return False
 156
 157         challenge_req = [
 158             user_hash,
 159             None, 1, None, [1, None, None, None, [password, None, True]],
 160             [
 161                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 162                 1, [None, None, []], None, None, None, True
 163             ]]
 164
 165         challenge_results = req(
 166             self._CHALLENGE_URL, challenge_req,
 167             'Logging in', 'Unable to log in')
 168
 169         if challenge_results is False:
 170             return
 171
 172         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 173         if login_res:
 174             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 175             warn(
 176                 'Unable to login: %s' % 'Invalid password'
 177                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 178             return False
 179
 180         res = try_get(challenge_results, lambda x: x[0][-1], list)
 181         if not res:
 182             warn('Unable to extract result entry')
 183             return False
 184
 185         login_challenge = try_get(res, lambda x: x[0][0], list)
 186         if login_challenge:
 187             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 188             if challenge_str == 'TWO_STEP_VERIFICATION':
 189                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 190                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 191                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 192                 if status == 'QUOTA_EXCEEDED':
 193                     warn('Exceeded the limit of TFA codes, try later')
 194                     return False
 195
 196                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 197                 if not tl:
 198                     warn('Unable to extract TL')
 199                     return False
 200
 201                 tfa_code = self._get_tfa_info('2-step verification code')
 202
 203                 if not tfa_code:
 204                     warn(
 205                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 206                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 207                     return False
 208
 209                 tfa_code = remove_start(tfa_code, 'G-')
 210
 211                 tfa_req = [
 212                     user_hash, None, 2, None,
 213                     [
 214                         9, None, None, None, None, None, None, None,
 215                         [None, tfa_code, True, 2]
 216                     ]]
 217
 218                 tfa_results = req(
 219                     self._TFA_URL.format(tl), tfa_req,
 220                     'Submitting TFA code', 'Unable to submit TFA code')
 221
 222                 if tfa_results is False:
 223                     return False
 224
 225                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 226                 if tfa_res:
 227                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 228                     warn(
 229                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 230                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 231                     return False
 232
 233                 check_cookie_url = try_get(
 234                     tfa_results, lambda x: x[0][-1][2], compat_str)
 235             else:
 236                 CHALLENGES = {
 237                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 238                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 239                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 240                 }
 241                 challenge = CHALLENGES.get(
 242                     challenge_str,
 243                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 244                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 245                 return False
 246         else:
 247             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 248
 249         if not check_cookie_url:
 250             warn('Unable to extract CheckCookie URL')
 251             return False
 252
 253         check_cookie_results = self._download_webpage(
 254             check_cookie_url, None, 'Checking cookie', fatal=False)
 255
 256         if check_cookie_results is False:
 257             return False
 258
 259         if 'https://myaccount.google.com/' not in check_cookie_results:
 260             warn('Unable to log in')
 261             return False
 262
 263         return True
 264
 265     def _download_webpage_handle(self, *args, **kwargs):
 266         query = kwargs.get('query', {}).copy()
 267         query['disable_polymer'] = 'true'
 268         kwargs['query'] = query
 269         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 270             *args, **compat_kwargs(kwargs))
 271
 272     def _real_initialize(self):
 273         if self._downloader is None:
 274             return
 275         self._set_language()
 276         if not self._login():
 277             return
 278
 279
 280 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 281     # Extract entries from page with "Load more" button
 282     def _entries(self, page, playlist_id):
 283         more_widget_html = content_html = page
 284         for page_num in itertools.count(1):
 285             for entry in self._process_page(content_html):
 286                 yield entry
 287
 288             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 289             if not mobj:
 290                 break
 291
 292             count = 0
 293             retries = 3
 294             while count <= retries:
 295                 try:
 296                     # Downloading page may result in intermittent 5xx HTTP error
 297                     # that is usually worked around with a retry
 298                     more = self._download_json(
 299                         'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 300                         'Downloading page #%s%s'
 301                         % (page_num, ' (retry #%d)' % count if count else ''),
 302                         transform_source=uppercase_escape)
 303                     break
 304                 except ExtractorError as e:
 305                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
 306                         count += 1
 307                         if count <= retries:
 308                             continue
 309                     raise
 310
 311             content_html = more['content_html']
 312             if not content_html.strip():
 313                 # Some webpages show a "Load more" button but they don't
 314                 # have more videos
 315                 break
 316             more_widget_html = more['load_more_widget_html']
 317
 318
 319 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 320     def _process_page(self, content):
 321         for video_id, video_title in self.extract_videos_from_page(content):
 322             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 323
 324     def extract_videos_from_page(self, page):
 325         ids_in_page = []
 326         titles_in_page = []
 327         for mobj in re.finditer(self._VIDEO_RE, page):
 328             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 329             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 330                 continue
 331             video_id = mobj.group('id')
 332             video_title = unescapeHTML(mobj.group('title'))
 333             if video_title:
 334                 video_title = video_title.strip()
 335             try:
 336                 idx = ids_in_page.index(video_id)
 337                 if video_title and not titles_in_page[idx]:
 338                     titles_in_page[idx] = video_title
 339             except ValueError:
 340                 ids_in_page.append(video_id)
 341                 titles_in_page.append(video_title)
 342         return zip(ids_in_page, titles_in_page)
 343
 344
 345 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 346     def _process_page(self, content):
 347         for playlist_id in orderedSet(re.findall(
 348                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 349                 content)):
 350             yield self.url_result(
 351                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 352
 353     def _real_extract(self, url):
 354         playlist_id = self._match_id(url)
 355         webpage = self._download_webpage(url, playlist_id)
 356         title = self._og_search_title(webpage, fatal=False)
 357         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 358
 359
 360 class YoutubeIE(YoutubeBaseInfoExtractor):
 361     IE_DESC = 'YouTube.com'
 362     _VALID_URL = r"""(?x)^
 363                      (
 364                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 365                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 366                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 367                             (?:www\.)?pwnyoutube\.com/|
 368                             (?:www\.)?hooktube\.com/|
 369                             (?:www\.)?yourepeat\.com/|
 370                             tube\.majestyc\.net/|
 371                             (?:(?:www|dev)\.)?invidio\.us/|
 372                             (?:www\.)?invidiou\.sh/|
 373                             (?:www\.)?invidious\.snopyta\.org/|
 374                             (?:www\.)?invidious\.kabi\.tk/|
 375                             (?:www\.)?vid\.wxzm\.sx/|
 376                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 377                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 378                          (?:                                                  # the various things that can precede the ID:
 379                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 380                              |(?:                                             # or the v= param in all its forms
 381                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 382                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 383                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 384                                  v=
 385                              )
 386                          ))
 387                          |(?:
 388                             youtu\.be|                                        # just youtu.be/xxxx
 389                             vid\.plus|                                        # or vid.plus/xxxx
 390                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 391                          )/
 392                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 393                          )
 394                      )?                                                       # all until now is optional -> you can pass the naked ID
 395                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 396                      (?!.*?\blist=
 397                         (?:
 398                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 399                             WL                                                # WL are handled by the watch later IE
 400                         )
 401                      )
 402                      (?(1).+)?                                                # if we found the ID, everything can follow
 403                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 404     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 405     _formats = {
 406         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 407         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 408         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 409         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 410         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 411         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 412         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 413         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 414         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 415         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 416         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 417         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 418         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 419         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 420         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 421         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 422         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 423         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 424
 425
 426         # 3D videos
 427         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 428         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 429         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 430         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 431         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 432         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 433         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 434
 435         # Apple HTTP Live Streaming
 436         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 437         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 438         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 439         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 440         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 441         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 442         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 443         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 444
 445         # DASH mp4 video
 446         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 447         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 448         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 449         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 450         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 451         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 452         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 453         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 454         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 455         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 456         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 457         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 458
 459         # Dash mp4 audio
 460         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 461         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 462         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 463         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 464         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 465         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 466         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 467
 468         # Dash webm
 469         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 470         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 471         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 472         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 473         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 474         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 475         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 476         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 477         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 478         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 479         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 480         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 481         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 482         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 483         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 484         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 485         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 486         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 487         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 488         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 489         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 490         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 491
 492         # Dash webm audio
 493         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 494         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 495
 496         # Dash webm audio with opus inside
 497         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 498         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 499         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 500
 501         # RTMP (unnamed)
 502         '_rtmp': {'protocol': 'rtmp'},
 503
 504         # av01 video only formats sometimes served with "unknown" codecs
 505         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 506         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 507         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 508         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 509     }
 510     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 511
 512     _GEO_BYPASS = False
 513
 514     IE_NAME = 'youtube'
 515     _TESTS = [
 516         {
 517             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 518             'info_dict': {
 519                 'id': 'BaW_jenozKc',
 520                 'ext': 'mp4',
 521                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 522                 'uploader': 'Philipp Hagemeister',
 523                 'uploader_id': 'phihag',
 524                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 525                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 526                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 527                 'upload_date': '20121002',
 528                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 529                 'categories': ['Science & Technology'],
 530                 'tags': ['youtube-dl'],
 531                 'duration': 10,
 532                 'view_count': int,
 533                 'like_count': int,
 534                 'dislike_count': int,
 535                 'start_time': 1,
 536                 'end_time': 9,
 537             }
 538         },
 539         {
 540             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 541             'note': 'Test generic use_cipher_signature video (#897)',
 542             'info_dict': {
 543                 'id': 'UxxajLWwzqY',
 544                 'ext': 'mp4',
 545                 'upload_date': '20120506',
 546                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 547                 'alt_title': 'I Love It (feat. Charli XCX)',
 548                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 549                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 550                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 551                          'iconic ep', 'iconic', 'love', 'it'],
 552                 'duration': 180,
 553                 'uploader': 'Icona Pop',
 554                 'uploader_id': 'IconaPop',
 555                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 556                 'creator': 'Icona Pop',
 557                 'track': 'I Love It (feat. Charli XCX)',
 558                 'artist': 'Icona Pop',
 559             }
 560         },
 561         {
 562             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 563             'note': 'Test VEVO video with age protection (#956)',
 564             'info_dict': {
 565                 'id': '07FYdnEawAQ',
 566                 'ext': 'mp4',
 567                 'upload_date': '20130703',
 568                 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
 569                 'alt_title': 'Tunnel Vision',
 570                 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
 571                 'duration': 419,
 572                 'uploader': 'justintimberlakeVEVO',
 573                 'uploader_id': 'justintimberlakeVEVO',
 574                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 575                 'creator': 'Justin Timberlake',
 576                 'track': 'Tunnel Vision',
 577                 'artist': 'Justin Timberlake',
 578                 'age_limit': 18,
 579             }
 580         },
 581         {
 582             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 583             'note': 'Embed-only video (#1746)',
 584             'info_dict': {
 585                 'id': 'yZIXLfi8CZQ',
 586                 'ext': 'mp4',
 587                 'upload_date': '20120608',
 588                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 589                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 590                 'uploader': 'SET India',
 591                 'uploader_id': 'setindia',
 592                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 593                 'age_limit': 18,
 594             }
 595         },
 596         {
 597             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 598             'note': 'Use the first video ID in the URL',
 599             'info_dict': {
 600                 'id': 'BaW_jenozKc',
 601                 'ext': 'mp4',
 602                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 603                 'uploader': 'Philipp Hagemeister',
 604                 'uploader_id': 'phihag',
 605                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 606                 'upload_date': '20121002',
 607                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 608                 'categories': ['Science & Technology'],
 609                 'tags': ['youtube-dl'],
 610                 'duration': 10,
 611                 'view_count': int,
 612                 'like_count': int,
 613                 'dislike_count': int,
 614             },
 615             'params': {
 616                 'skip_download': True,
 617             },
 618         },
 619         {
 620             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 621             'note': '256k DASH audio (format 141) via DASH manifest',
 622             'info_dict': {
 623                 'id': 'a9LDPn-MO4I',
 624                 'ext': 'm4a',
 625                 'upload_date': '20121002',
 626                 'uploader_id': '8KVIDEO',
 627                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 628                 'description': '',
 629                 'uploader': '8KVIDEO',
 630                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 631             },
 632             'params': {
 633                 'youtube_include_dash_manifest': True,
 634                 'format': '141',
 635             },
 636             'skip': 'format 141 not served anymore',
 637         },
 638         # DASH manifest with encrypted signature
 639         {
 640             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 641             'info_dict': {
 642                 'id': 'IB3lcPjvWLA',
 643                 'ext': 'm4a',
 644                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
 645                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
 646                 'duration': 244,
 647                 'uploader': 'AfrojackVEVO',
 648                 'uploader_id': 'AfrojackVEVO',
 649                 'upload_date': '20131011',
 650             },
 651             'params': {
 652                 'youtube_include_dash_manifest': True,
 653                 'format': '141/bestaudio[ext=m4a]',
 654             },
 655         },
 656         # JS player signature function name containing $
 657         {
 658             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 659             'info_dict': {
 660                 'id': 'nfWlot6h_JM',
 661                 'ext': 'm4a',
 662                 'title': 'Taylor Swift - Shake It Off',
 663                 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
 664                 'duration': 242,
 665                 'uploader': 'TaylorSwiftVEVO',
 666                 'uploader_id': 'TaylorSwiftVEVO',
 667                 'upload_date': '20140818',
 668                 'creator': 'Taylor Swift',
 669             },
 670             'params': {
 671                 'youtube_include_dash_manifest': True,
 672                 'format': '141/bestaudio[ext=m4a]',
 673             },
 674         },
 675         # Controversy video
 676         {
 677             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 678             'info_dict': {
 679                 'id': 'T4XJQO3qol8',
 680                 'ext': 'mp4',
 681                 'duration': 219,
 682                 'upload_date': '20100909',
 683                 'uploader': 'Amazing Atheist',
 684                 'uploader_id': 'TheAmazingAtheist',
 685                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 686                 'title': 'Burning Everyone\'s Koran',
 687                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 688             }
 689         },
 690         # Normal age-gate video (No vevo, embed allowed)
 691         {
 692             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 693             'info_dict': {
 694                 'id': 'HtVdAasjOgU',
 695                 'ext': 'mp4',
 696                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 697                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 698                 'duration': 142,
 699                 'uploader': 'The Witcher',
 700                 'uploader_id': 'WitcherGame',
 701                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 702                 'upload_date': '20140605',
 703                 'age_limit': 18,
 704             },
 705         },
 706         # Age-gate video with encrypted signature
 707         {
 708             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 709             'info_dict': {
 710                 'id': '6kLq3WMV1nU',
 711                 'ext': 'mp4',
 712                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 713                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 714                 'duration': 246,
 715                 'uploader': 'LloydVEVO',
 716                 'uploader_id': 'LloydVEVO',
 717                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 718                 'upload_date': '20110629',
 719                 'age_limit': 18,
 720             },
 721         },
 722         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
 723         # YouTube Red ad is not captured for creator
 724         {
 725             'url': '__2ABJjxzNo',
 726             'info_dict': {
 727                 'id': '__2ABJjxzNo',
 728                 'ext': 'mp4',
 729                 'duration': 266,
 730                 'upload_date': '20100430',
 731                 'uploader_id': 'deadmau5',
 732                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 733                 'creator': 'deadmau5',
 734                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 735                 'uploader': 'deadmau5',
 736                 'title': 'Deadmau5 - Some Chords (HD)',
 737                 'alt_title': 'Some Chords',
 738             },
 739             'expected_warnings': [
 740                 'DASH manifest missing',
 741             ]
 742         },
 743         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
 744         {
 745             'url': 'lqQg6PlCWgI',
 746             'info_dict': {
 747                 'id': 'lqQg6PlCWgI',
 748                 'ext': 'mp4',
 749                 'duration': 6085,
 750                 'upload_date': '20150827',
 751                 'uploader_id': 'olympic',
 752                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 753                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 754                 'uploader': 'Olympic',
 755                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 756             },
 757             'params': {
 758                 'skip_download': 'requires avconv',
 759             }
 760         },
 761         # Non-square pixels
 762         {
 763             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 764             'info_dict': {
 765                 'id': '_b-2C3KPAM0',
 766                 'ext': 'mp4',
 767                 'stretched_ratio': 16 / 9.,
 768                 'duration': 85,
 769                 'upload_date': '20110310',
 770                 'uploader_id': 'AllenMeow',
 771                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 772                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 773                 'uploader': '孫ᄋᄅ',
 774                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 775             },
 776         },
 777         # url_encoded_fmt_stream_map is empty string
 778         {
 779             'url': 'qEJwOuvDf7I',
 780             'info_dict': {
 781                 'id': 'qEJwOuvDf7I',
 782                 'ext': 'webm',
 783                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 784                 'description': '',
 785                 'upload_date': '20150404',
 786                 'uploader_id': 'spbelect',
 787                 'uploader': 'Наблюдатели Петербурга',
 788             },
 789             'params': {
 790                 'skip_download': 'requires avconv',
 791             },
 792             'skip': 'This live event has ended.',
 793         },
 794         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
 795         {
 796             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 797             'info_dict': {
 798                 'id': 'FIl7x6_3R5Y',
 799                 'ext': 'webm',
 800                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 801                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 802                 'duration': 220,
 803                 'upload_date': '20150625',
 804                 'uploader_id': 'dorappi2000',
 805                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 806                 'uploader': 'dorappi2000',
 807                 'formats': 'mincount:31',
 808             },
 809             'skip': 'not actual anymore',
 810         },
 811         # DASH manifest with segment_list
 812         {
 813             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 814             'md5': '8ce563a1d667b599d21064e982ab9e31',
 815             'info_dict': {
 816                 'id': 'CsmdDsKjzN8',
 817                 'ext': 'mp4',
 818                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 819                 'uploader': 'Airtek',
 820                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 821                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 822                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 823             },
 824             'params': {
 825                 'youtube_include_dash_manifest': True,
 826                 'format': '135',  # bestvideo
 827             },
 828             'skip': 'This live event has ended.',
 829         },
 830         {
 831             # Multifeed videos (multiple cameras), URL is for Main Camera
 832             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 833             'info_dict': {
 834                 'id': 'jqWvoWXjCVs',
 835                 'title': 'teamPGP: Rocket League Noob Stream',
 836                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 837             },
 838             'playlist': [{
 839                 'info_dict': {
 840                     'id': 'jqWvoWXjCVs',
 841                     'ext': 'mp4',
 842                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 843                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 844                     'duration': 7335,
 845                     'upload_date': '20150721',
 846                     'uploader': 'Beer Games Beer',
 847                     'uploader_id': 'beergamesbeer',
 848                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 849                     'license': 'Standard YouTube License',
 850                 },
 851             }, {
 852                 'info_dict': {
 853                     'id': '6h8e8xoXJzg',
 854                     'ext': 'mp4',
 855                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 856                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 857                     'duration': 7337,
 858                     'upload_date': '20150721',
 859                     'uploader': 'Beer Games Beer',
 860                     'uploader_id': 'beergamesbeer',
 861                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 862                     'license': 'Standard YouTube License',
 863                 },
 864             }, {
 865                 'info_dict': {
 866                     'id': 'PUOgX5z9xZw',
 867                     'ext': 'mp4',
 868                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 869                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 870                     'duration': 7337,
 871                     'upload_date': '20150721',
 872                     'uploader': 'Beer Games Beer',
 873                     'uploader_id': 'beergamesbeer',
 874                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 875                     'license': 'Standard YouTube License',
 876                 },
 877             }, {
 878                 'info_dict': {
 879                     'id': 'teuwxikvS5k',
 880                     'ext': 'mp4',
 881                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 882                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 883                     'duration': 7334,
 884                     'upload_date': '20150721',
 885                     'uploader': 'Beer Games Beer',
 886                     'uploader_id': 'beergamesbeer',
 887                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 888                     'license': 'Standard YouTube License',
 889                 },
 890             }],
 891             'params': {
 892                 'skip_download': True,
 893             },
 894             'skip': 'This video is not available.',
 895         },
 896         {
 897             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
 898             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 899             'info_dict': {
 900                 'id': 'gVfLd0zydlo',
 901                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 902             },
 903             'playlist_count': 2,
 904             'skip': 'Not multifeed anymore',
 905         },
 906         {
 907             'url': 'https://vid.plus/FlRa-iH7PGw',
 908             'only_matching': True,
 909         },
 910         {
 911             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 912             'only_matching': True,
 913         },
 914         {
 915             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 916             # Also tests cut-off URL expansion in video description (see
 917             # https://github.com/ytdl-org/youtube-dl/issues/1892,
 918             # https://github.com/ytdl-org/youtube-dl/issues/8164)
 919             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 920             'info_dict': {
 921                 'id': 'lsguqyKfVQg',
 922                 'ext': 'mp4',
 923                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 924                 'alt_title': 'Dark Walk - Position Music',
 925                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 926                 'duration': 133,
 927                 'upload_date': '20151119',
 928                 'uploader_id': 'IronSoulElf',
 929                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 930                 'uploader': 'IronSoulElf',
 931                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 932                 'track': 'Dark Walk - Position Music',
 933                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 934                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
 935             },
 936             'params': {
 937                 'skip_download': True,
 938             },
 939         },
 940         {
 941             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
 942             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 943             'only_matching': True,
 944         },
 945         {
 946             # Video with yt:stretch=17:0
 947             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 948             'info_dict': {
 949                 'id': 'Q39EVAstoRM',
 950                 'ext': 'mp4',
 951                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 952                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 953                 'upload_date': '20151107',
 954                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 955                 'uploader': 'CH GAMER DROID',
 956             },
 957             'params': {
 958                 'skip_download': True,
 959             },
 960             'skip': 'This video does not exist.',
 961         },
 962         {
 963             # Video licensed under Creative Commons
 964             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 965             'info_dict': {
 966                 'id': 'M4gD1WSo5mA',
 967                 'ext': 'mp4',
 968                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 969                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 970                 'duration': 721,
 971                 'upload_date': '20150127',
 972                 'uploader_id': 'BerkmanCenter',
 973                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 974                 'uploader': 'The Berkman Klein Center for Internet & Society',
 975                 'license': 'Creative Commons Attribution license (reuse allowed)',
 976             },
 977             'params': {
 978                 'skip_download': True,
 979             },
 980         },
 981         {
 982             # Channel-like uploader_url
 983             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 984             'info_dict': {
 985                 'id': 'eQcmzGIKrzg',
 986                 'ext': 'mp4',
 987                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 988                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 989                 'duration': 4060,
 990                 'upload_date': '20151119',
 991                 'uploader': 'Bernie Sanders',
 992                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 993                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 994                 'license': 'Creative Commons Attribution license (reuse allowed)',
 995             },
 996             'params': {
 997                 'skip_download': True,
 998             },
 999         },
1000         {
1001             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1002             'only_matching': True,
1003         },
1004         {
1005             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1006             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1007             'only_matching': True,
1008         },
1009         {
1010             # Rental video preview
1011             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1012             'info_dict': {
1013                 'id': 'uGpuVWrhIzE',
1014                 'ext': 'mp4',
1015                 'title': 'Piku - Trailer',
1016                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1017                 'upload_date': '20150811',
1018                 'uploader': 'FlixMatrix',
1019                 'uploader_id': 'FlixMatrixKaravan',
1020                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1021                 'license': 'Standard YouTube License',
1022             },
1023             'params': {
1024                 'skip_download': True,
1025             },
1026             'skip': 'This video is not available.',
1027         },
1028         {
1029             # YouTube Red video with episode data
1030             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1031             'info_dict': {
1032                 'id': 'iqKdEhx-dD4',
1033                 'ext': 'mp4',
1034                 'title': 'Isolation - Mind Field (Ep 1)',
1035                 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
1036                 'duration': 2085,
1037                 'upload_date': '20170118',
1038                 'uploader': 'Vsauce',
1039                 'uploader_id': 'Vsauce',
1040                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1041                 'series': 'Mind Field',
1042                 'season_number': 1,
1043                 'episode_number': 1,
1044             },
1045             'params': {
1046                 'skip_download': True,
1047             },
1048             'expected_warnings': [
1049                 'Skipping DASH manifest',
1050             ],
1051         },
1052         {
1053             # The following content has been identified by the YouTube community
1054             # as inappropriate or offensive to some audiences.
1055             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1056             'info_dict': {
1057                 'id': '6SJNVb0GnPI',
1058                 'ext': 'mp4',
1059                 'title': 'Race Differences in Intelligence',
1060                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1061                 'duration': 965,
1062                 'upload_date': '20140124',
1063                 'uploader': 'New Century Foundation',
1064                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1065                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1066             },
1067             'params': {
1068                 'skip_download': True,
1069             },
1070         },
1071         {
1072             # itag 212
1073             'url': '1t24XAntNCY',
1074             'only_matching': True,
1075         },
1076         {
1077             # geo restricted to JP
1078             'url': 'sJL6WA-aGkQ',
1079             'only_matching': True,
1080         },
1081         {
1082             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1083             'only_matching': True,
1084         },
1085         {
1086             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1087             'only_matching': True,
1088         },
1089         {
1090             # DRM protected
1091             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1092             'only_matching': True,
1093         },
1094         {
1095             # Video with unsupported adaptive stream type formats
1096             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1097             'info_dict': {
1098                 'id': 'Z4Vy8R84T1U',
1099                 'ext': 'mp4',
1100                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1101                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1102                 'duration': 433,
1103                 'upload_date': '20130923',
1104                 'uploader': 'Amelia Putri Harwita',
1105                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1106                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1107                 'formats': 'maxcount:10',
1108             },
1109             'params': {
1110                 'skip_download': True,
1111                 'youtube_include_dash_manifest': False,
1112             },
1113         },
1114         {
1115             # Youtube Music Auto-generated description
1116             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1117             'info_dict': {
1118                 'id': 'MgNrAu2pzNs',
1119                 'ext': 'mp4',
1120                 'title': 'Voyeur Girl',
1121                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1122                 'upload_date': '20190312',
1123                 'uploader': 'Various Artists - Topic',
1124                 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1125                 'artist': 'Stephen',
1126                 'track': 'Voyeur Girl',
1127                 'album': 'it\'s too much love to know my dear',
1128                 'release_date': '20190313',
1129                 'release_year': 2019,
1130             },
1131             'params': {
1132                 'skip_download': True,
1133             },
1134         },
1135         {
1136             # Youtube Music Auto-generated description
1137             # Retrieve 'artist' field from 'Artist:' in video description
1138             # when it is present on youtube music video
1139             'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1140             'info_dict': {
1141                 'id': 'k0jLE7tTwjY',
1142                 'ext': 'mp4',
1143                 'title': 'Latch Feat. Sam Smith',
1144                 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1145                 'upload_date': '20150110',
1146                 'uploader': 'Various Artists - Topic',
1147                 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1148                 'artist': 'Disclosure',
1149                 'track': 'Latch Feat. Sam Smith',
1150                 'album': 'Latch Featuring Sam Smith',
1151                 'release_date': '20121008',
1152                 'release_year': 2012,
1153             },
1154             'params': {
1155                 'skip_download': True,
1156             },
1157         },
1158         {
1159             # Youtube Music Auto-generated description
1160             # handle multiple artists on youtube music video
1161             'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1162             'info_dict': {
1163                 'id': '74qn0eJSjpA',
1164                 'ext': 'mp4',
1165                 'title': 'Eastside',
1166                 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1167                 'upload_date': '20180710',
1168                 'uploader': 'Benny Blanco - Topic',
1169                 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1170                 'artist': 'benny blanco, Halsey, Khalid',
1171                 'track': 'Eastside',
1172                 'album': 'Eastside',
1173                 'release_date': '20180713',
1174                 'release_year': 2018,
1175             },
1176             'params': {
1177                 'skip_download': True,
1178             },
1179         },
1180         {
1181             # Youtube Music Auto-generated description
1182             # handle youtube music video with release_year and no release_date
1183             'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1184             'info_dict': {
1185                 'id': '-hcAI0g-f5M',
1186                 'ext': 'mp4',
1187                 'title': 'Put It On Me',
1188                 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1189                 'upload_date': '20180426',
1190                 'uploader': 'Matt Maeson - Topic',
1191                 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1192                 'artist': 'Matt Maeson',
1193                 'track': 'Put It On Me',
1194                 'album': 'The Hearse',
1195                 'release_date': None,
1196                 'release_year': 2018,
1197             },
1198             'params': {
1199                 'skip_download': True,
1200             },
1201         },
1202     ]
1203
1204     def __init__(self, *args, **kwargs):
1205         super(YoutubeIE, self).__init__(*args, **kwargs)
1206         self._player_cache = {}
1207
1208     def report_video_info_webpage_download(self, video_id):
1209         """Report attempt to download video info webpage."""
1210         self.to_screen('%s: Downloading video info webpage' % video_id)
1211
1212     def report_information_extraction(self, video_id):
1213         """Report attempt to extract video information."""
1214         self.to_screen('%s: Extracting video information' % video_id)
1215
1216     def report_unavailable_format(self, video_id, format):
1217         """Report extracted video URL."""
1218         self.to_screen('%s: Format %s not available' % (video_id, format))
1219
1220     def report_rtmp_download(self):
1221         """Indicate the download will use the RTMP protocol."""
1222         self.to_screen('RTMP download detected')
1223
1224     def _signature_cache_id(self, example_sig):
1225         """ Return a string representation of a signature """
1226         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1227
1228     def _extract_signature_function(self, video_id, player_url, example_sig):
1229         id_m = re.match(
1230             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1231             player_url)
1232         if not id_m:
1233             raise ExtractorError('Cannot identify player %r' % player_url)
1234         player_type = id_m.group('ext')
1235         player_id = id_m.group('id')
1236
1237         # Read from filesystem cache
1238         func_id = '%s_%s_%s' % (
1239             player_type, player_id, self._signature_cache_id(example_sig))
1240         assert os.path.basename(func_id) == func_id
1241
1242         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1243         if cache_spec is not None:
1244             return lambda s: ''.join(s[i] for i in cache_spec)
1245
1246         download_note = (
1247             'Downloading player %s' % player_url
1248             if self._downloader.params.get('verbose') else
1249             'Downloading %s player %s' % (player_type, player_id)
1250         )
1251         if player_type == 'js':
1252             code = self._download_webpage(
1253                 player_url, video_id,
1254                 note=download_note,
1255                 errnote='Download of %s failed' % player_url)
1256             res = self._parse_sig_js(code)
1257         elif player_type == 'swf':
1258             urlh = self._request_webpage(
1259                 player_url, video_id,
1260                 note=download_note,
1261                 errnote='Download of %s failed' % player_url)
1262             code = urlh.read()
1263             res = self._parse_sig_swf(code)
1264         else:
1265             assert False, 'Invalid player type %r' % player_type
1266
1267         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1268         cache_res = res(test_string)
1269         cache_spec = [ord(c) for c in cache_res]
1270
1271         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1272         return res
1273
1274     def _print_sig_code(self, func, example_sig):
1275         def gen_sig_code(idxs):
1276             def _genslice(start, end, step):
1277                 starts = '' if start == 0 else str(start)
1278                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1279                 steps = '' if step == 1 else (':%d' % step)
1280                 return 's[%s%s%s]' % (starts, ends, steps)
1281
1282             step = None
1283             # Quelch pyflakes warnings - start will be set when step is set
1284             start = '(Never used)'
1285             for i, prev in zip(idxs[1:], idxs[:-1]):
1286                 if step is not None:
1287                     if i - prev == step:
1288                         continue
1289                     yield _genslice(start, prev, step)
1290                     step = None
1291                     continue
1292                 if i - prev in [-1, 1]:
1293                     step = i - prev
1294                     start = prev
1295                     continue
1296                 else:
1297                     yield 's[%d]' % prev
1298             if step is None:
1299                 yield 's[%d]' % i
1300             else:
1301                 yield _genslice(start, i, step)
1302
1303         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1304         cache_res = func(test_string)
1305         cache_spec = [ord(c) for c in cache_res]
1306         expr_code = ' + '.join(gen_sig_code(cache_spec))
1307         signature_id_tuple = '(%s)' % (
1308             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1309         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1310                 '    return %s\n') % (signature_id_tuple, expr_code)
1311         self.to_screen('Extracted signature function:\n' + code)
1312
1313     def _parse_sig_js(self, jscode):
1314         funcname = self._search_regex(
1315             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1316              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1317              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1318              # Obsolete patterns
1319              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1320              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1321              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1322              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1323              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1324              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1325              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1326              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1327             jscode, 'Initial JS player signature function name', group='sig')
1328
1329         jsi = JSInterpreter(jscode)
1330         initial_function = jsi.extract_function(funcname)
1331         return lambda s: initial_function([s])
1332
1333     def _parse_sig_swf(self, file_contents):
1334         swfi = SWFInterpreter(file_contents)
1335         TARGET_CLASSNAME = 'SignatureDecipher'
1336         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1337         initial_function = swfi.extract_function(searched_class, 'decipher')
1338         return lambda s: initial_function([s])
1339
1340     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1341         """Turn the encrypted s field into a working signature"""
1342
1343         if player_url is None:
1344             raise ExtractorError('Cannot decrypt signature without player_url')
1345
1346         if player_url.startswith('//'):
1347             player_url = 'https:' + player_url
1348         elif not re.match(r'https?://', player_url):
1349             player_url = compat_urlparse.urljoin(
1350                 'https://www.youtube.com', player_url)
1351         try:
1352             player_id = (player_url, self._signature_cache_id(s))
1353             if player_id not in self._player_cache:
1354                 func = self._extract_signature_function(
1355                     video_id, player_url, s
1356                 )
1357                 self._player_cache[player_id] = func
1358             func = self._player_cache[player_id]
1359             if self._downloader.params.get('youtube_print_sig_code'):
1360                 self._print_sig_code(func, s)
1361             return func(s)
1362         except Exception as e:
1363             tb = traceback.format_exc()
1364             raise ExtractorError(
1365                 'Signature extraction failed: ' + tb, cause=e)
1366
1367     def _get_subtitles(self, video_id, webpage):
1368         try:
1369             subs_doc = self._download_xml(
1370                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1371                 video_id, note=False)
1372         except ExtractorError as err:
1373             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1374             return {}
1375
1376         sub_lang_list = {}
1377         for track in subs_doc.findall('track'):
1378             lang = track.attrib['lang_code']
1379             if lang in sub_lang_list:
1380                 continue
1381             sub_formats = []
1382             for ext in self._SUBTITLE_FORMATS:
1383                 params = compat_urllib_parse_urlencode({
1384                     'lang': lang,
1385                     'v': video_id,
1386                     'fmt': ext,
1387                     'name': track.attrib['name'].encode('utf-8'),
1388                 })
1389                 sub_formats.append({
1390                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1391                     'ext': ext,
1392                 })
1393             sub_lang_list[lang] = sub_formats
1394         if not sub_lang_list:
1395             self._downloader.report_warning('video doesn\'t have subtitles')
1396             return {}
1397         return sub_lang_list
1398
1399     def _get_ytplayer_config(self, video_id, webpage):
1400         patterns = (
1401             # User data may contain arbitrary character sequences that may affect
1402             # JSON extraction with regex, e.g. when '};' is contained the second
1403             # regex won't capture the whole JSON. Yet working around by trying more
1404             # concrete regex first keeping in mind proper quoted string handling
1405             # to be implemented in future that will replace this workaround (see
1406             # https://github.com/ytdl-org/youtube-dl/issues/7468,
1407             # https://github.com/ytdl-org/youtube-dl/pull/7599)
1408             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1409             r';ytplayer\.config\s*=\s*({.+?});',
1410         )
1411         config = self._search_regex(
1412             patterns, webpage, 'ytplayer.config', default=None)
1413         if config:
1414             return self._parse_json(
1415                 uppercase_escape(config), video_id, fatal=False)
1416
1417     def _get_automatic_captions(self, video_id, webpage):
1418         """We need the webpage for getting the captions url, pass it as an
1419            argument to speed up the process."""
1420         self.to_screen('%s: Looking for automatic captions' % video_id)
1421         player_config = self._get_ytplayer_config(video_id, webpage)
1422         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1423         if not player_config:
1424             self._downloader.report_warning(err_msg)
1425             return {}
1426         try:
1427             args = player_config['args']
1428             caption_url = args.get('ttsurl')
1429             if caption_url:
1430                 timestamp = args['timestamp']
1431                 # We get the available subtitles
1432                 list_params = compat_urllib_parse_urlencode({
1433                     'type': 'list',
1434                     'tlangs': 1,
1435                     'asrs': 1,
1436                 })
1437                 list_url = caption_url + '&' + list_params
1438                 caption_list = self._download_xml(list_url, video_id)
1439                 original_lang_node = caption_list.find('track')
1440                 if original_lang_node is None:
1441                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1442                     return {}
1443                 original_lang = original_lang_node.attrib['lang_code']
1444                 caption_kind = original_lang_node.attrib.get('kind', '')
1445
1446                 sub_lang_list = {}
1447                 for lang_node in caption_list.findall('target'):
1448                     sub_lang = lang_node.attrib['lang_code']
1449                     sub_formats = []
1450                     for ext in self._SUBTITLE_FORMATS:
1451                         params = compat_urllib_parse_urlencode({
1452                             'lang': original_lang,
1453                             'tlang': sub_lang,
1454                             'fmt': ext,
1455                             'ts': timestamp,
1456                             'kind': caption_kind,
1457                         })
1458                         sub_formats.append({
1459                             'url': caption_url + '&' + params,
1460                             'ext': ext,
1461                         })
1462                     sub_lang_list[sub_lang] = sub_formats
1463                 return sub_lang_list
1464
1465             def make_captions(sub_url, sub_langs):
1466                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1467                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1468                 captions = {}
1469                 for sub_lang in sub_langs:
1470                     sub_formats = []
1471                     for ext in self._SUBTITLE_FORMATS:
1472                         caption_qs.update({
1473                             'tlang': [sub_lang],
1474                             'fmt': [ext],
1475                         })
1476                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1477                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1478                         sub_formats.append({
1479                             'url': sub_url,
1480                             'ext': ext,
1481                         })
1482                     captions[sub_lang] = sub_formats
1483                 return captions
1484
1485             # New captions format as of 22.06.2017
1486             player_response = args.get('player_response')
1487             if player_response and isinstance(player_response, compat_str):
1488                 player_response = self._parse_json(
1489                     player_response, video_id, fatal=False)
1490                 if player_response:
1491                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1492                     base_url = renderer['captionTracks'][0]['baseUrl']
1493                     sub_lang_list = []
1494                     for lang in renderer['translationLanguages']:
1495                         lang_code = lang.get('languageCode')
1496                         if lang_code:
1497                             sub_lang_list.append(lang_code)
1498                     return make_captions(base_url, sub_lang_list)
1499
1500             # Some videos don't provide ttsurl but rather caption_tracks and
1501             # caption_translation_languages (e.g. 20LmZk1hakA)
1502             # Does not used anymore as of 22.06.2017
1503             caption_tracks = args['caption_tracks']
1504             caption_translation_languages = args['caption_translation_languages']
1505             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1506             sub_lang_list = []
1507             for lang in caption_translation_languages.split(','):
1508                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1509                 sub_lang = lang_qs.get('lc', [None])[0]
1510                 if sub_lang:
1511                     sub_lang_list.append(sub_lang)
1512             return make_captions(caption_url, sub_lang_list)
1513         # An extractor error can be raise by the download process if there are
1514         # no automatic captions but there are subtitles
1515         except (KeyError, IndexError, ExtractorError):
1516             self._downloader.report_warning(err_msg)
1517             return {}
1518
1519     def _mark_watched(self, video_id, video_info, player_response):
1520         playback_url = url_or_none(try_get(
1521             player_response,
1522             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1523             video_info, lambda x: x['videostats_playback_base_url'][0]))
1524         if not playback_url:
1525             return
1526         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1527         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1528
1529         # cpn generation algorithm is reverse engineered from base.js.
1530         # In fact it works even with dummy cpn.
1531         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1532         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1533
1534         qs.update({
1535             'ver': ['2'],
1536             'cpn': [cpn],
1537         })
1538         playback_url = compat_urlparse.urlunparse(
1539             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1540
1541         self._download_webpage(
1542             playback_url, video_id, 'Marking watched',
1543             'Unable to mark watched', fatal=False)
1544
1545     @staticmethod
1546     def _extract_urls(webpage):
1547         # Embedded YouTube player
1548         entries = [
1549             unescapeHTML(mobj.group('url'))
1550             for mobj in re.finditer(r'''(?x)
1551             (?:
1552                 <iframe[^>]+?src=|
1553                 data-video-url=|
1554                 <embed[^>]+?src=|
1555                 embedSWF\(?:\s*|
1556                 <object[^>]+data=|
1557                 new\s+SWFObject\(
1558             )
1559             (["\'])
1560                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1561                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1562             \1''', webpage)]
1563
1564         # lazyYT YouTube embed
1565         entries.extend(list(map(
1566             unescapeHTML,
1567             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1568
1569         # Wordpress "YouTube Video Importer" plugin
1570         matches = re.findall(r'''(?x)<div[^>]+
1571             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1572             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1573         entries.extend(m[-1] for m in matches)
1574
1575         return entries
1576
1577     @staticmethod
1578     def _extract_url(webpage):
1579         urls = YoutubeIE._extract_urls(webpage)
1580         return urls[0] if urls else None
1581
1582     @classmethod
1583     def extract_id(cls, url):
1584         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1585         if mobj is None:
1586             raise ExtractorError('Invalid URL: %s' % url)
1587         video_id = mobj.group(2)
1588         return video_id
1589
1590     def _extract_annotations(self, video_id):
1591         return self._download_webpage(
1592             'https://www.youtube.com/annotations_invideo', video_id,
1593             note='Downloading annotations',
1594             errnote='Unable to download video annotations', fatal=False,
1595             query={
1596                 'features': 1,
1597                 'legacy': 1,
1598                 'video_id': video_id,
1599             })
1600
1601     @staticmethod
1602     def _extract_chapters(description, duration):
1603         if not description:
1604             return None
1605         chapter_lines = re.findall(
1606             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1607             description)
1608         if not chapter_lines:
1609             return None
1610         chapters = []
1611         for next_num, (chapter_line, time_point) in enumerate(
1612                 chapter_lines, start=1):
1613             start_time = parse_duration(time_point)
1614             if start_time is None:
1615                 continue
1616             if start_time > duration:
1617                 break
1618             end_time = (duration if next_num == len(chapter_lines)
1619                         else parse_duration(chapter_lines[next_num][1]))
1620             if end_time is None:
1621                 continue
1622             if end_time > duration:
1623                 end_time = duration
1624             if start_time > end_time:
1625                 break
1626             chapter_title = re.sub(
1627                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1628             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1629             chapters.append({
1630                 'start_time': start_time,
1631                 'end_time': end_time,
1632                 'title': chapter_title,
1633             })
1634         return chapters
1635
1636     def _real_extract(self, url):
1637         url, smuggled_data = unsmuggle_url(url, {})
1638
1639         proto = (
1640             'http' if self._downloader.params.get('prefer_insecure', False)
1641             else 'https')
1642
1643         start_time = None
1644         end_time = None
1645         parsed_url = compat_urllib_parse_urlparse(url)
1646         for component in [parsed_url.fragment, parsed_url.query]:
1647             query = compat_parse_qs(component)
1648             if start_time is None and 't' in query:
1649                 start_time = parse_duration(query['t'][0])
1650             if start_time is None and 'start' in query:
1651                 start_time = parse_duration(query['start'][0])
1652             if end_time is None and 'end' in query:
1653                 end_time = parse_duration(query['end'][0])
1654
1655         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1656         mobj = re.search(self._NEXT_URL_RE, url)
1657         if mobj:
1658             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1659         video_id = self.extract_id(url)
1660
1661         # Get video webpage
1662         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1663         video_webpage = self._download_webpage(url, video_id)
1664
1665         # Attempt to extract SWF player URL
1666         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1667         if mobj is not None:
1668             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1669         else:
1670             player_url = None
1671
1672         dash_mpds = []
1673
1674         def add_dash_mpd(video_info):
1675             dash_mpd = video_info.get('dashmpd')
1676             if dash_mpd and dash_mpd[0] not in dash_mpds:
1677                 dash_mpds.append(dash_mpd[0])
1678
1679         def add_dash_mpd_pr(pl_response):
1680             dash_mpd = url_or_none(try_get(
1681                 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1682                 compat_str))
1683             if dash_mpd and dash_mpd not in dash_mpds:
1684                 dash_mpds.append(dash_mpd)
1685
1686         is_live = None
1687         view_count = None
1688
1689         def extract_view_count(v_info):
1690             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1691
1692         def extract_token(v_info):
1693             return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1694
1695         player_response = {}
1696
1697         # Get video info
1698         embed_webpage = None
1699         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1700             age_gate = True
1701             # We simulate the access to the video from www.youtube.com/v/{video_id}
1702             # this can be viewed without login into Youtube
1703             url = proto + '://www.youtube.com/embed/%s' % video_id
1704             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1705             data = compat_urllib_parse_urlencode({
1706                 'video_id': video_id,
1707                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1708                 'sts': self._search_regex(
1709                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1710             })
1711             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1712             video_info_webpage = self._download_webpage(
1713                 video_info_url, video_id,
1714                 note='Refetching age-gated info webpage',
1715                 errnote='unable to download video info webpage')
1716             video_info = compat_parse_qs(video_info_webpage)
1717             add_dash_mpd(video_info)
1718         else:
1719             age_gate = False
1720             video_info = None
1721             sts = None
1722             # Try looking directly into the video webpage
1723             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1724             if ytplayer_config:
1725                 args = ytplayer_config['args']
1726                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1727                     # Convert to the same format returned by compat_parse_qs
1728                     video_info = dict((k, [v]) for k, v in args.items())
1729                     add_dash_mpd(video_info)
1730                 # Rental video is not rented but preview is available (e.g.
1731                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1732                 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1733                 if not video_info and args.get('ypc_vid'):
1734                     return self.url_result(
1735                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1736                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1737                     is_live = True
1738                 sts = ytplayer_config.get('sts')
1739                 if not player_response:
1740                     pl_response = str_or_none(args.get('player_response'))
1741                     if pl_response:
1742                         pl_response = self._parse_json(pl_response, video_id, fatal=False)
1743                         if isinstance(pl_response, dict):
1744                             player_response = pl_response
1745             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1746                 add_dash_mpd_pr(player_response)
1747                 # We also try looking in get_video_info since it may contain different dashmpd
1748                 # URL that points to a DASH manifest with possibly different itag set (some itags
1749                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1750                 # manifest pointed by get_video_info's dashmpd).
1751                 # The general idea is to take a union of itags of both DASH manifests (for example
1752                 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1753                 self.report_video_info_webpage_download(video_id)
1754                 for el in ('embedded', 'detailpage', 'vevo', ''):
1755                     query = {
1756                         'video_id': video_id,
1757                         'ps': 'default',
1758                         'eurl': '',
1759                         'gl': 'US',
1760                         'hl': 'en',
1761                     }
1762                     if el:
1763                         query['el'] = el
1764                     if sts:
1765                         query['sts'] = sts
1766                     video_info_webpage = self._download_webpage(
1767                         '%s://www.youtube.com/get_video_info' % proto,
1768                         video_id, note=False,
1769                         errnote='unable to download video info webpage',
1770                         fatal=False, query=query)
1771                     if not video_info_webpage:
1772                         continue
1773                     get_video_info = compat_parse_qs(video_info_webpage)
1774                     if not player_response:
1775                         pl_response = get_video_info.get('player_response', [None])[0]
1776                         if isinstance(pl_response, dict):
1777                             player_response = pl_response
1778                             add_dash_mpd_pr(player_response)
1779                     add_dash_mpd(get_video_info)
1780                     if view_count is None:
1781                         view_count = extract_view_count(get_video_info)
1782                     if not video_info:
1783                         video_info = get_video_info
1784                     get_token = extract_token(get_video_info)
1785                     if get_token:
1786                         # Different get_video_info requests may report different results, e.g.
1787                         # some may report video unavailability, but some may serve it without
1788                         # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1789                         # the original webpage as well as el=info and el=embedded get_video_info
1790                         # requests report video unavailability due to geo restriction while
1791                         # el=detailpage succeeds and returns valid data). This is probably
1792                         # due to YouTube measures against IP ranges of hosting providers.
1793                         # Working around by preferring the first succeeded video_info containing
1794                         # the token if no such video_info yet was found.
1795                         token = extract_token(video_info)
1796                         if not token:
1797                             video_info = get_video_info
1798                         break
1799
1800         def extract_unavailable_message():
1801             return self._html_search_regex(
1802                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1803                 video_webpage, 'unavailable message', default=None)
1804
1805         if not video_info:
1806             unavailable_message = extract_unavailable_message()
1807             if not unavailable_message:
1808                 unavailable_message = 'Unable to extract video data'
1809             raise ExtractorError(
1810                 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1811
1812         video_details = try_get(
1813             player_response, lambda x: x['videoDetails'], dict) or {}
1814
1815         # title
1816         if 'title' in video_info:
1817             video_title = video_info['title'][0]
1818         elif 'title' in player_response:
1819             video_title = video_details['title']
1820         else:
1821             self._downloader.report_warning('Unable to extract video title')
1822             video_title = '_'
1823
1824         # description
1825         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1826         if video_description:
1827
1828             def replace_url(m):
1829                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1830                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1831                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1832                     qs = compat_parse_qs(parsed_redir_url.query)
1833                     q = qs.get('q')
1834                     if q and q[0]:
1835                         return q[0]
1836                 return redir_url
1837
1838             description_original = video_description = re.sub(r'''(?x)
1839                 <a\s+
1840                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1841                     (?:title|href)="([^"]+)"\s+
1842                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1843                     class="[^"]*"[^>]*>
1844                 [^<]+\.{3}\s*
1845                 </a>
1846             ''', replace_url, video_description)
1847             video_description = clean_html(video_description)
1848         else:
1849             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1850             if fd_mobj:
1851                 video_description = unescapeHTML(fd_mobj.group(1))
1852             else:
1853                 video_description = ''
1854
1855         if not smuggled_data.get('force_singlefeed', False):
1856             if not self._downloader.params.get('noplaylist'):
1857                 multifeed_metadata_list = try_get(
1858                     player_response,
1859                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1860                     compat_str) or try_get(
1861                     video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1862                 if multifeed_metadata_list:
1863                     entries = []
1864                     feed_ids = []
1865                     for feed in multifeed_metadata_list.split(','):
1866                         # Unquote should take place before split on comma (,) since textual
1867                         # fields may contain comma as well (see
1868                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
1869                         feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1870                         entries.append({
1871                             '_type': 'url_transparent',
1872                             'ie_key': 'Youtube',
1873                             'url': smuggle_url(
1874                                 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1875                                 {'force_singlefeed': True}),
1876                             'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1877                         })
1878                         feed_ids.append(feed_data['id'][0])
1879                     self.to_screen(
1880                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1881                         % (', '.join(feed_ids), video_id))
1882                     return self.playlist_result(entries, video_id, video_title, video_description)
1883             else:
1884                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1885
1886         if view_count is None:
1887             view_count = extract_view_count(video_info)
1888         if view_count is None and video_details:
1889             view_count = int_or_none(video_details.get('viewCount'))
1890
1891         # Check for "rental" videos
1892         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1893             raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
1894
1895         def _extract_filesize(media_url):
1896             return int_or_none(self._search_regex(
1897                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1898
1899         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1900             self.report_rtmp_download()
1901             formats = [{
1902                 'format_id': '_rtmp',
1903                 'protocol': 'rtmp',
1904                 'url': video_info['conn'][0],
1905                 'player_url': player_url,
1906             }]
1907         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1908             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1909             if 'rtmpe%3Dyes' in encoded_url_map:
1910                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
1911             formats_spec = {}
1912             fmt_list = video_info.get('fmt_list', [''])[0]
1913             if fmt_list:
1914                 for fmt in fmt_list.split(','):
1915                     spec = fmt.split('/')
1916                     if len(spec) > 1:
1917                         width_height = spec[1].split('x')
1918                         if len(width_height) == 2:
1919                             formats_spec[spec[0]] = {
1920                                 'resolution': spec[1],
1921                                 'width': int_or_none(width_height[0]),
1922                                 'height': int_or_none(width_height[1]),
1923                             }
1924             q = qualities(['small', 'medium', 'hd720'])
1925             streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1926             if streaming_formats:
1927                 for fmt in streaming_formats:
1928                     itag = str_or_none(fmt.get('itag'))
1929                     if not itag:
1930                         continue
1931                     quality = fmt.get('quality')
1932                     quality_label = fmt.get('qualityLabel') or quality
1933                     formats_spec[itag] = {
1934                         'asr': int_or_none(fmt.get('audioSampleRate')),
1935                         'filesize': int_or_none(fmt.get('contentLength')),
1936                         'format_note': quality_label,
1937                         'fps': int_or_none(fmt.get('fps')),
1938                         'height': int_or_none(fmt.get('height')),
1939                         'quality': q(quality),
1940                         # bitrate for itag 43 is always 2147483647
1941                         'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1942                         'width': int_or_none(fmt.get('width')),
1943                     }
1944             formats = []
1945             for url_data_str in encoded_url_map.split(','):
1946                 url_data = compat_parse_qs(url_data_str)
1947                 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
1948                     continue
1949                 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1950                 # Unsupported FORMAT_STREAM_TYPE_OTF
1951                 if stream_type == 3:
1952                     continue
1953                 format_id = url_data['itag'][0]
1954                 url = url_data['url'][0]
1955
1956                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1957                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1958                     jsplayer_url_json = self._search_regex(
1959                         ASSETS_RE,
1960                         embed_webpage if age_gate else video_webpage,
1961                         'JS player URL (1)', default=None)
1962                     if not jsplayer_url_json and not age_gate:
1963                         # We need the embed website after all
1964                         if embed_webpage is None:
1965                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1966                             embed_webpage = self._download_webpage(
1967                                 embed_url, video_id, 'Downloading embed webpage')
1968                         jsplayer_url_json = self._search_regex(
1969                             ASSETS_RE, embed_webpage, 'JS player URL')
1970
1971                     player_url = json.loads(jsplayer_url_json)
1972                     if player_url is None:
1973                         player_url_json = self._search_regex(
1974                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1975                             video_webpage, 'age gate player URL')
1976                         player_url = json.loads(player_url_json)
1977
1978                 if 'sig' in url_data:
1979                     url += '&signature=' + url_data['sig'][0]
1980                 elif 's' in url_data:
1981                     encrypted_sig = url_data['s'][0]
1982
1983                     if self._downloader.params.get('verbose'):
1984                         if player_url is None:
1985                             player_version = 'unknown'
1986                             player_desc = 'unknown'
1987                         else:
1988                             if player_url.endswith('swf'):
1989                                 player_version = self._search_regex(
1990                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1991                                     'flash player', fatal=False)
1992                                 player_desc = 'flash player %s' % player_version
1993                             else:
1994                                 player_version = self._search_regex(
1995                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1996                                      r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
1997                                     player_url,
1998                                     'html5 player', fatal=False)
1999                                 player_desc = 'html5 player %s' % player_version
2000
2001                         parts_sizes = self._signature_cache_id(encrypted_sig)
2002                         self.to_screen('{%s} signature length %s, %s' %
2003                                        (format_id, parts_sizes, player_desc))
2004
2005                     signature = self._decrypt_signature(
2006                         encrypted_sig, video_id, player_url, age_gate)
2007                     sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2008                     url += '&%s=%s' % (sp, signature)
2009                 if 'ratebypass' not in url:
2010                     url += '&ratebypass=yes'
2011
2012                 dct = {
2013                     'format_id': format_id,
2014                     'url': url,
2015                     'player_url': player_url,
2016                 }
2017                 if format_id in self._formats:
2018                     dct.update(self._formats[format_id])
2019                 if format_id in formats_spec:
2020                     dct.update(formats_spec[format_id])
2021
2022                 # Some itags are not included in DASH manifest thus corresponding formats will
2023                 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
2024                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2025                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2026                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
2027
2028                 filesize = int_or_none(url_data.get(
2029                     'clen', [None])[0]) or _extract_filesize(url)
2030
2031                 quality = url_data.get('quality', [None])[0]
2032
2033                 more_fields = {
2034                     'filesize': filesize,
2035                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
2036                     'width': width,
2037                     'height': height,
2038                     'fps': int_or_none(url_data.get('fps', [None])[0]),
2039                     'format_note': url_data.get('quality_label', [None])[0] or quality,
2040                     'quality': q(quality),
2041                 }
2042                 for key, value in more_fields.items():
2043                     if value:
2044                         dct[key] = value
2045                 type_ = url_data.get('type', [None])[0]
2046                 if type_:
2047                     type_split = type_.split(';')
2048                     kind_ext = type_split[0].split('/')
2049                     if len(kind_ext) == 2:
2050                         kind, _ = kind_ext
2051                         dct['ext'] = mimetype2ext(type_split[0])
2052                         if kind in ('audio', 'video'):
2053                             codecs = None
2054                             for mobj in re.finditer(
2055                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2056                                 if mobj.group('key') == 'codecs':
2057                                     codecs = mobj.group('val')
2058                                     break
2059                             if codecs:
2060                                 dct.update(parse_codecs(codecs))
2061                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2062                     dct['downloader_options'] = {
2063                         # Youtube throttles chunks >~10M
2064                         'http_chunk_size': 10485760,
2065                     }
2066                 formats.append(dct)
2067         else:
2068             manifest_url = (
2069                 url_or_none(try_get(
2070                     player_response,
2071                     lambda x: x['streamingData']['hlsManifestUrl'],
2072                     compat_str))
2073                 or url_or_none(try_get(
2074                     video_info, lambda x: x['hlsvp'][0], compat_str)))
2075             if manifest_url:
2076                 formats = []
2077                 m3u8_formats = self._extract_m3u8_formats(
2078                     manifest_url, video_id, 'mp4', fatal=False)
2079                 for a_format in m3u8_formats:
2080                     itag = self._search_regex(
2081                         r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2082                     if itag:
2083                         a_format['format_id'] = itag
2084                         if itag in self._formats:
2085                             dct = self._formats[itag].copy()
2086                             dct.update(a_format)
2087                             a_format = dct
2088                     a_format['player_url'] = player_url
2089                     # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2090                     a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2091                     formats.append(a_format)
2092             else:
2093                 error_message = clean_html(video_info.get('reason', [None])[0])
2094                 if not error_message:
2095                     error_message = extract_unavailable_message()
2096                 if error_message:
2097                     raise ExtractorError(error_message, expected=True)
2098                 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
2099
2100         # uploader
2101         video_uploader = try_get(
2102             video_info, lambda x: x['author'][0],
2103             compat_str) or str_or_none(video_details.get('author'))
2104         if video_uploader:
2105             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2106         else:
2107             self._downloader.report_warning('unable to extract uploader name')
2108
2109         # uploader_id
2110         video_uploader_id = None
2111         video_uploader_url = None
2112         mobj = re.search(
2113             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2114             video_webpage)
2115         if mobj is not None:
2116             video_uploader_id = mobj.group('uploader_id')
2117             video_uploader_url = mobj.group('uploader_url')
2118         else:
2119             self._downloader.report_warning('unable to extract uploader nickname')
2120
2121         channel_id = (
2122             str_or_none(video_details.get('channelId'))
2123             or self._html_search_meta(
2124                 'channelId', video_webpage, 'channel id', default=None)
2125             or self._search_regex(
2126                 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2127                 video_webpage, 'channel id', default=None, group='id'))
2128         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2129
2130         # thumbnail image
2131         # We try first to get a high quality image:
2132         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2133                             video_webpage, re.DOTALL)
2134         if m_thumb is not None:
2135             video_thumbnail = m_thumb.group(1)
2136         elif 'thumbnail_url' not in video_info:
2137             self._downloader.report_warning('unable to extract video thumbnail')
2138             video_thumbnail = None
2139         else:   # don't panic if we can't find it
2140             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2141
2142         # upload date
2143         upload_date = self._html_search_meta(
2144             'datePublished', video_webpage, 'upload date', default=None)
2145         if not upload_date:
2146             upload_date = self._search_regex(
2147                 [r'(?s)id="eow-date.*?>(.*?)</span>',
2148                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2149                 video_webpage, 'upload date', default=None)
2150         upload_date = unified_strdate(upload_date)
2151
2152         video_license = self._html_search_regex(
2153             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2154             video_webpage, 'license', default=None)
2155
2156         m_music = re.search(
2157             r'''(?x)
2158                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2159                 <ul[^>]*>\s*
2160                 <li>(?P<title>.+?)
2161                 by (?P<creator>.+?)
2162                 (?:
2163                     \(.+?\)|
2164                     <a[^>]*
2165                         (?:
2166                             \bhref=["\']/red[^>]*>|             # drop possible
2167                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
2168                         )
2169                     .*?
2170                 )?</li
2171             ''',
2172             video_webpage)
2173         if m_music:
2174             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2175             video_creator = clean_html(m_music.group('creator'))
2176         else:
2177             video_alt_title = video_creator = None
2178
2179         def extract_meta(field):
2180             return self._html_search_regex(
2181                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2182                 video_webpage, field, default=None)
2183
2184         track = extract_meta('Song')
2185         artist = extract_meta('Artist')
2186         album = extract_meta('Album')
2187
2188         # Youtube Music Auto-generated description
2189         release_date = release_year = None
2190         if video_description:
2191             mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2192             if mobj:
2193                 if not track:
2194                     track = mobj.group('track').strip()
2195                 if not artist:
2196                     artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
2197                 if not album:
2198                     album = mobj.group('album'.strip())
2199                 release_year = mobj.group('release_year')
2200                 release_date = mobj.group('release_date')
2201                 if release_date:
2202                     release_date = release_date.replace('-', '')
2203                     if not release_year:
2204                         release_year = int(release_date[:4])
2205                 if release_year:
2206                     release_year = int(release_year)
2207
2208         m_episode = re.search(
2209             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2210             video_webpage)
2211         if m_episode:
2212             series = unescapeHTML(m_episode.group('series'))
2213             season_number = int(m_episode.group('season'))
2214             episode_number = int(m_episode.group('episode'))
2215         else:
2216             series = season_number = episode_number = None
2217
2218         m_cat_container = self._search_regex(
2219             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2220             video_webpage, 'categories', default=None)
2221         if m_cat_container:
2222             category = self._html_search_regex(
2223                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2224                 default=None)
2225             video_categories = None if category is None else [category]
2226         else:
2227             video_categories = None
2228
2229         video_tags = [
2230             unescapeHTML(m.group('content'))
2231             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2232
2233         def _extract_count(count_name):
2234             return str_to_int(self._search_regex(
2235                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2236                 % re.escape(count_name),
2237                 video_webpage, count_name, default=None))
2238
2239         like_count = _extract_count('like')
2240         dislike_count = _extract_count('dislike')
2241
2242         if view_count is None:
2243             view_count = str_to_int(self._search_regex(
2244                 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2245                 'view count', default=None))
2246
2247         average_rating = (
2248             float_or_none(video_details.get('averageRating'))
2249             or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2250
2251         # subtitles
2252         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2253         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2254
2255         video_duration = try_get(
2256             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2257         if not video_duration:
2258             video_duration = int_or_none(video_details.get('lengthSeconds'))
2259         if not video_duration:
2260             video_duration = parse_duration(self._html_search_meta(
2261                 'duration', video_webpage, 'video duration'))
2262
2263         # annotations
2264         video_annotations = None
2265         if self._downloader.params.get('writeannotations', False):
2266             video_annotations = self._extract_annotations(video_id)
2267
2268         chapters = self._extract_chapters(description_original, video_duration)
2269
2270         # Look for the DASH manifest
2271         if self._downloader.params.get('youtube_include_dash_manifest', True):
2272             dash_mpd_fatal = True
2273             for mpd_url in dash_mpds:
2274                 dash_formats = {}
2275                 try:
2276                     def decrypt_sig(mobj):
2277                         s = mobj.group(1)
2278                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2279                         return '/signature/%s' % dec_s
2280
2281                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2282
2283                     for df in self._extract_mpd_formats(
2284                             mpd_url, video_id, fatal=dash_mpd_fatal,
2285                             formats_dict=self._formats):
2286                         if not df.get('filesize'):
2287                             df['filesize'] = _extract_filesize(df['url'])
2288                         # Do not overwrite DASH format found in some previous DASH manifest
2289                         if df['format_id'] not in dash_formats:
2290                             dash_formats[df['format_id']] = df
2291                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2292                         # allow them to fail without bug report message if we already have
2293                         # some DASH manifest succeeded. This is temporary workaround to reduce
2294                         # burst of bug reports until we figure out the reason and whether it
2295                         # can be fixed at all.
2296                         dash_mpd_fatal = False
2297                 except (ExtractorError, KeyError) as e:
2298                     self.report_warning(
2299                         'Skipping DASH manifest: %r' % e, video_id)
2300                 if dash_formats:
2301                     # Remove the formats we found through non-DASH, they
2302                     # contain less info and it can be wrong, because we use
2303                     # fixed values (for example the resolution). See
2304                     # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
2305                     # example.
2306                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2307                     formats.extend(dash_formats.values())
2308
2309         # Check for malformed aspect ratio
2310         stretched_m = re.search(
2311             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2312             video_webpage)
2313         if stretched_m:
2314             w = float(stretched_m.group('w'))
2315             h = float(stretched_m.group('h'))
2316             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2317             # We will only process correct ratios.
2318             if w > 0 and h > 0:
2319                 ratio = w / h
2320                 for f in formats:
2321                     if f.get('vcodec') != 'none':
2322                         f['stretched_ratio'] = ratio
2323
2324         if not formats:
2325             token = extract_token(video_info)
2326             if not token:
2327                 if 'reason' in video_info:
2328                     if 'The uploader has not made this video available in your country.' in video_info['reason']:
2329                         regions_allowed = self._html_search_meta(
2330                             'regionsAllowed', video_webpage, default=None)
2331                         countries = regions_allowed.split(',') if regions_allowed else None
2332                         self.raise_geo_restricted(
2333                             msg=video_info['reason'][0], countries=countries)
2334                     reason = video_info['reason'][0]
2335                     if 'Invalid parameters' in reason:
2336                         unavailable_message = extract_unavailable_message()
2337                         if unavailable_message:
2338                             reason = unavailable_message
2339                     raise ExtractorError(
2340                         'YouTube said: %s' % reason,
2341                         expected=True, video_id=video_id)
2342                 else:
2343                     raise ExtractorError(
2344                         '"token" parameter not in video info for unknown reason',
2345                         video_id=video_id)
2346
2347         if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2348             raise ExtractorError('This video is DRM protected.', expected=True)
2349
2350         self._sort_formats(formats)
2351
2352         self.mark_watched(video_id, video_info, player_response)
2353
2354         return {
2355             'id': video_id,
2356             'uploader': video_uploader,
2357             'uploader_id': video_uploader_id,
2358             'uploader_url': video_uploader_url,
2359             'channel_id': channel_id,
2360             'channel_url': channel_url,
2361             'upload_date': upload_date,
2362             'license': video_license,
2363             'creator': video_creator or artist,
2364             'title': video_title,
2365             'alt_title': video_alt_title or track,
2366             'thumbnail': video_thumbnail,
2367             'description': video_description,
2368             'categories': video_categories,
2369             'tags': video_tags,
2370             'subtitles': video_subtitles,
2371             'automatic_captions': automatic_captions,
2372             'duration': video_duration,
2373             'age_limit': 18 if age_gate else 0,
2374             'annotations': video_annotations,
2375             'chapters': chapters,
2376             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2377             'view_count': view_count,
2378             'like_count': like_count,
2379             'dislike_count': dislike_count,
2380             'average_rating': average_rating,
2381             'formats': formats,
2382             'is_live': is_live,
2383             'start_time': start_time,
2384             'end_time': end_time,
2385             'series': series,
2386             'season_number': season_number,
2387             'episode_number': episode_number,
2388             'track': track,
2389             'artist': artist,
2390             'album': album,
2391             'release_date': release_date,
2392             'release_year': release_year,
2393         }
2394
2395
2396 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2397     IE_DESC = 'YouTube.com playlists'
2398     _VALID_URL = r"""(?x)(?:
2399                         (?:https?://)?
2400                         (?:\w+\.)?
2401                         (?:
2402                             (?:
2403                                 youtube\.com|
2404                                 invidio\.us
2405                             )
2406                             /
2407                             (?:
2408                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2409                                \? (?:.*?[&;])*? (?:p|a|list)=
2410                             |  p/
2411                             )|
2412                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2413                         )
2414                         (
2415                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2416                             # Top tracks, they can also include dots
2417                             |(?:MC)[\w\.]*
2418                         )
2419                         .*
2420                      |
2421                         (%(playlist_id)s)
2422                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2423     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2424     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2425     IE_NAME = 'youtube:playlist'
2426     _TESTS = [{
2427         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2428         'info_dict': {
2429             'title': 'ytdl test PL',
2430             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2431         },
2432         'playlist_count': 3,
2433     }, {
2434         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2435         'info_dict': {
2436             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2437             'title': 'YDL_Empty_List',
2438         },
2439         'playlist_count': 0,
2440         'skip': 'This playlist is private',
2441     }, {
2442         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2443         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2444         'info_dict': {
2445             'title': '29C3: Not my department',
2446             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2447         },
2448         'playlist_count': 95,
2449     }, {
2450         'note': 'issue #673',
2451         'url': 'PLBB231211A4F62143',
2452         'info_dict': {
2453             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2454             'id': 'PLBB231211A4F62143',
2455         },
2456         'playlist_mincount': 26,
2457     }, {
2458         'note': 'Large playlist',
2459         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2460         'info_dict': {
2461             'title': 'Uploads from Cauchemar',
2462             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2463         },
2464         'playlist_mincount': 799,
2465     }, {
2466         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2467         'info_dict': {
2468             'title': 'YDL_safe_search',
2469             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2470         },
2471         'playlist_count': 2,
2472         'skip': 'This playlist is private',
2473     }, {
2474         'note': 'embedded',
2475         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2476         'playlist_count': 4,
2477         'info_dict': {
2478             'title': 'JODA15',
2479             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2480         }
2481     }, {
2482         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2483         'playlist_mincount': 485,
2484         'info_dict': {
2485             'title': '2017 華語最新單曲 (2/24更新)',
2486             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2487         }
2488     }, {
2489         'note': 'Embedded SWF player',
2490         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2491         'playlist_count': 4,
2492         'info_dict': {
2493             'title': 'JODA7',
2494             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2495         }
2496     }, {
2497         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2498         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2499         'info_dict': {
2500             'title': 'Uploads from Interstellar Movie',
2501             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2502         },
2503         'playlist_mincount': 21,
2504     }, {
2505         # Playlist URL that does not actually serve a playlist
2506         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2507         'info_dict': {
2508             'id': 'FqZTN594JQw',
2509             'ext': 'webm',
2510             'title': "Smiley's People 01 detective, Adventure Series, Action",
2511             'uploader': 'STREEM',
2512             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2513             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2514             'upload_date': '20150526',
2515             'license': 'Standard YouTube License',
2516             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2517             'categories': ['People & Blogs'],
2518             'tags': list,
2519             'view_count': int,
2520             'like_count': int,
2521             'dislike_count': int,
2522         },
2523         'params': {
2524             'skip_download': True,
2525         },
2526         'add_ie': [YoutubeIE.ie_key()],
2527     }, {
2528         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2529         'info_dict': {
2530             'id': 'yeWKywCrFtk',
2531             'ext': 'mp4',
2532             'title': 'Small Scale Baler and Braiding Rugs',
2533             'uploader': 'Backus-Page House Museum',
2534             'uploader_id': 'backuspagemuseum',
2535             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2536             'upload_date': '20161008',
2537             'license': 'Standard YouTube License',
2538             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2539             'categories': ['Nonprofits & Activism'],
2540             'tags': list,
2541             'like_count': int,
2542             'dislike_count': int,
2543         },
2544         'params': {
2545             'noplaylist': True,
2546             'skip_download': True,
2547         },
2548     }, {
2549         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2550         'only_matching': True,
2551     }, {
2552         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2553         'only_matching': True,
2554     }, {
2555         # music album playlist
2556         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2557         'only_matching': True,
2558     }, {
2559         'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2560         'only_matching': True,
2561     }]
2562
2563     def _real_initialize(self):
2564         self._login()
2565
2566     def _extract_mix(self, playlist_id):
2567         # The mixes are generated from a single video
2568         # the id of the playlist is just 'RD' + video_id
2569         ids = []
2570         last_id = playlist_id[-11:]
2571         for n in itertools.count(1):
2572             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2573             webpage = self._download_webpage(
2574                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2575             new_ids = orderedSet(re.findall(
2576                 r'''(?xs)data-video-username=".*?".*?
2577                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2578                 webpage))
2579             # Fetch new pages until all the videos are repeated, it seems that
2580             # there are always 51 unique videos.
2581             new_ids = [_id for _id in new_ids if _id not in ids]
2582             if not new_ids:
2583                 break
2584             ids.extend(new_ids)
2585             last_id = ids[-1]
2586
2587         url_results = self._ids_to_results(ids)
2588
2589         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2590         title_span = (
2591             search_title('playlist-title')
2592             or search_title('title long-title')
2593             or search_title('title'))
2594         title = clean_html(title_span)
2595
2596         return self.playlist_result(url_results, playlist_id, title)
2597
2598     def _extract_playlist(self, playlist_id):
2599         url = self._TEMPLATE_URL % playlist_id
2600         page = self._download_webpage(url, playlist_id)
2601
2602         # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
2603         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2604             match = match.strip()
2605             # Check if the playlist exists or is private
2606             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2607             if mobj:
2608                 reason = mobj.group('reason')
2609                 message = 'This playlist %s' % reason
2610                 if 'private' in reason:
2611                     message += ', use --username or --netrc to access it'
2612                 message += '.'
2613                 raise ExtractorError(message, expected=True)
2614             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2615                 raise ExtractorError(
2616                     'Invalid parameters. Maybe URL is incorrect.',
2617                     expected=True)
2618             elif re.match(r'[^<]*Choose your language[^<]*', match):
2619                 continue
2620             else:
2621                 self.report_warning('Youtube gives an alert message: ' + match)
2622
2623         playlist_title = self._html_search_regex(
2624             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2625             page, 'title', default=None)
2626
2627         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2628         uploader = self._search_regex(
2629             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2630             page, 'uploader', default=None)
2631         mobj = re.search(
2632             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2633             page)
2634         if mobj:
2635             uploader_id = mobj.group('uploader_id')
2636             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2637         else:
2638             uploader_id = uploader_url = None
2639
2640         has_videos = True
2641
2642         if not playlist_title:
2643             try:
2644                 # Some playlist URLs don't actually serve a playlist (e.g.
2645                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2646                 next(self._entries(page, playlist_id))
2647             except StopIteration:
2648                 has_videos = False
2649
2650         playlist = self.playlist_result(
2651             self._entries(page, playlist_id), playlist_id, playlist_title)
2652         playlist.update({
2653             'uploader': uploader,
2654             'uploader_id': uploader_id,
2655             'uploader_url': uploader_url,
2656         })
2657
2658         return has_videos, playlist
2659
2660     def _check_download_just_video(self, url, playlist_id):
2661         # Check if it's a video-specific URL
2662         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2663         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2664             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2665             'video id', default=None)
2666         if video_id:
2667             if self._downloader.params.get('noplaylist'):
2668                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2669                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2670             else:
2671                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2672                 return video_id, None
2673         return None, None
2674
2675     def _real_extract(self, url):
2676         # Extract playlist id
2677         mobj = re.match(self._VALID_URL, url)
2678         if mobj is None:
2679             raise ExtractorError('Invalid URL: %s' % url)
2680         playlist_id = mobj.group(1) or mobj.group(2)
2681
2682         video_id, video = self._check_download_just_video(url, playlist_id)
2683         if video:
2684             return video
2685
2686         if playlist_id.startswith(('RD', 'UL', 'PU')):
2687             # Mixes require a custom extraction process
2688             return self._extract_mix(playlist_id)
2689
2690         has_videos, playlist = self._extract_playlist(playlist_id)
2691         if has_videos or not video_id:
2692             return playlist
2693
2694         # Some playlist URLs don't actually serve a playlist (see
2695         # https://github.com/ytdl-org/youtube-dl/issues/10537).
2696         # Fallback to plain video extraction if there is a video id
2697         # along with playlist id.
2698         return self.url_result(video_id, 'Youtube', video_id=video_id)
2699
2700
2701 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2702     IE_DESC = 'YouTube.com channels'
2703     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
2704     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2705     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2706     IE_NAME = 'youtube:channel'
2707     _TESTS = [{
2708         'note': 'paginated channel',
2709         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2710         'playlist_mincount': 91,
2711         'info_dict': {
2712             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2713             'title': 'Uploads from lex will',
2714         }
2715     }, {
2716         'note': 'Age restricted channel',
2717         # from https://www.youtube.com/user/DeusExOfficial
2718         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2719         'playlist_mincount': 64,
2720         'info_dict': {
2721             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2722             'title': 'Uploads from Deus Ex',
2723         },
2724     }, {
2725         'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2726         'only_matching': True,
2727     }]
2728
2729     @classmethod
2730     def suitable(cls, url):
2731         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2732                 else super(YoutubeChannelIE, cls).suitable(url))
2733
2734     def _build_template_url(self, url, channel_id):
2735         return self._TEMPLATE_URL % channel_id
2736
2737     def _real_extract(self, url):
2738         channel_id = self._match_id(url)
2739
2740         url = self._build_template_url(url, channel_id)
2741
2742         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2743         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2744         # otherwise fallback on channel by page extraction
2745         channel_page = self._download_webpage(
2746             url + '?view=57', channel_id,
2747             'Downloading channel page', fatal=False)
2748         if channel_page is False:
2749             channel_playlist_id = False
2750         else:
2751             channel_playlist_id = self._html_search_meta(
2752                 'channelId', channel_page, 'channel id', default=None)
2753             if not channel_playlist_id:
2754                 channel_url = self._html_search_meta(
2755                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2756                     channel_page, 'channel url', default=None)
2757                 if channel_url:
2758                     channel_playlist_id = self._search_regex(
2759                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2760                         channel_url, 'channel id', default=None)
2761         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2762             playlist_id = 'UU' + channel_playlist_id[2:]
2763             return self.url_result(
2764                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2765
2766         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2767         autogenerated = re.search(r'''(?x)
2768                 class="[^"]*?(?:
2769                     channel-header-autogenerated-label|
2770                     yt-channel-title-autogenerated
2771                 )[^"]*"''', channel_page) is not None
2772
2773         if autogenerated:
2774             # The videos are contained in a single page
2775             # the ajax pages can't be used, they are empty
2776             entries = [
2777                 self.url_result(
2778                     video_id, 'Youtube', video_id=video_id,
2779                     video_title=video_title)
2780                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2781             return self.playlist_result(entries, channel_id)
2782
2783         try:
2784             next(self._entries(channel_page, channel_id))
2785         except StopIteration:
2786             alert_message = self._html_search_regex(
2787                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2788                 channel_page, 'alert', default=None, group='alert')
2789             if alert_message:
2790                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2791
2792         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2793
2794
2795 class YoutubeUserIE(YoutubeChannelIE):
2796     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2797     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2798     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2799     IE_NAME = 'youtube:user'
2800
2801     _TESTS = [{
2802         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2803         'playlist_mincount': 320,
2804         'info_dict': {
2805             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2806             'title': 'Uploads from The Linux Foundation',
2807         }
2808     }, {
2809         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2810         # but not https://www.youtube.com/user/12minuteathlete/videos
2811         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2812         'playlist_mincount': 249,
2813         'info_dict': {
2814             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2815             'title': 'Uploads from 12 Minute Athlete',
2816         }
2817     }, {
2818         'url': 'ytuser:phihag',
2819         'only_matching': True,
2820     }, {
2821         'url': 'https://www.youtube.com/c/gametrailers',
2822         'only_matching': True,
2823     }, {
2824         'url': 'https://www.youtube.com/gametrailers',
2825         'only_matching': True,
2826     }, {
2827         # This channel is not available, geo restricted to JP
2828         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2829         'only_matching': True,
2830     }]
2831
2832     @classmethod
2833     def suitable(cls, url):
2834         # Don't return True if the url can be extracted with other youtube
2835         # extractor, the regex would is too permissive and it would match.
2836         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2837         if any(ie.suitable(url) for ie in other_yt_ies):
2838             return False
2839         else:
2840             return super(YoutubeUserIE, cls).suitable(url)
2841
2842     def _build_template_url(self, url, channel_id):
2843         mobj = re.match(self._VALID_URL, url)
2844         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2845
2846
2847 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2848     IE_DESC = 'YouTube.com live streams'
2849     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2850     IE_NAME = 'youtube:live'
2851
2852     _TESTS = [{
2853         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2854         'info_dict': {
2855             'id': 'a48o2S1cPoo',
2856             'ext': 'mp4',
2857             'title': 'The Young Turks - Live Main Show',
2858             'uploader': 'The Young Turks',
2859             'uploader_id': 'TheYoungTurks',
2860             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2861             'upload_date': '20150715',
2862             'license': 'Standard YouTube License',
2863             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2864             'categories': ['News & Politics'],
2865             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2866             'like_count': int,
2867             'dislike_count': int,
2868         },
2869         'params': {
2870             'skip_download': True,
2871         },
2872     }, {
2873         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2874         'only_matching': True,
2875     }, {
2876         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2877         'only_matching': True,
2878     }, {
2879         'url': 'https://www.youtube.com/TheYoungTurks/live',
2880         'only_matching': True,
2881     }]
2882
2883     def _real_extract(self, url):
2884         mobj = re.match(self._VALID_URL, url)
2885         channel_id = mobj.group('id')
2886         base_url = mobj.group('base_url')
2887         webpage = self._download_webpage(url, channel_id, fatal=False)
2888         if webpage:
2889             page_type = self._og_search_property(
2890                 'type', webpage, 'page type', default='')
2891             video_id = self._html_search_meta(
2892                 'videoId', webpage, 'video id', default=None)
2893             if page_type.startswith('video') and video_id and re.match(
2894                     r'^[0-9A-Za-z_-]{11}$', video_id):
2895                 return self.url_result(video_id, YoutubeIE.ie_key())
2896         return self.url_result(base_url)
2897
2898
2899 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2900     IE_DESC = 'YouTube.com user/channel playlists'
2901     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2902     IE_NAME = 'youtube:playlists'
2903
2904     _TESTS = [{
2905         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2906         'playlist_mincount': 4,
2907         'info_dict': {
2908             'id': 'ThirstForScience',
2909             'title': 'Thirst for Science',
2910         },
2911     }, {
2912         # with "Load more" button
2913         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2914         'playlist_mincount': 70,
2915         'info_dict': {
2916             'id': 'igorkle1',
2917             'title': 'Игорь Клейнер',
2918         },
2919     }, {
2920         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2921         'playlist_mincount': 17,
2922         'info_dict': {
2923             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2924             'title': 'Chem Player',
2925         },
2926     }]
2927
2928
2929 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2930     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2931
2932
2933 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2934     IE_DESC = 'YouTube.com searches'
2935     # there doesn't appear to be a real limit, for example if you search for
2936     # 'python' you get more than 8.000.000 results
2937     _MAX_RESULTS = float('inf')
2938     IE_NAME = 'youtube:search'
2939     _SEARCH_KEY = 'ytsearch'
2940     _EXTRA_QUERY_ARGS = {}
2941     _TESTS = []
2942
2943     def _get_n_results(self, query, n):
2944         """Get a specified number of results for a query"""
2945
2946         videos = []
2947         limit = n
2948
2949         url_query = {
2950             'search_query': query.encode('utf-8'),
2951         }
2952         url_query.update(self._EXTRA_QUERY_ARGS)
2953         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2954
2955         for pagenum in itertools.count(1):
2956             data = self._download_json(
2957                 result_url, video_id='query "%s"' % query,
2958                 note='Downloading page %s' % pagenum,
2959                 errnote='Unable to download API page',
2960                 query={'spf': 'navigate'})
2961             html_content = data[1]['body']['content']
2962
2963             if 'class="search-message' in html_content:
2964                 raise ExtractorError(
2965                     '[youtube] No video results', expected=True)
2966
2967             new_videos = list(self._process_page(html_content))
2968             videos += new_videos
2969             if not new_videos or len(videos) > limit:
2970                 break
2971             next_link = self._html_search_regex(
2972                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2973                 html_content, 'next link', default=None)
2974             if next_link is None:
2975                 break
2976             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2977
2978         if len(videos) > n:
2979             videos = videos[:n]
2980         return self.playlist_result(videos, query)
2981
2982
2983 class YoutubeSearchDateIE(YoutubeSearchIE):
2984     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2985     _SEARCH_KEY = 'ytsearchdate'
2986     IE_DESC = 'YouTube.com searches, newest videos first'
2987     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2988
2989
2990 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2991     IE_DESC = 'YouTube.com search URLs'
2992     IE_NAME = 'youtube:search_url'
2993     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2994     _TESTS = [{
2995         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2996         'playlist_mincount': 5,
2997         'info_dict': {
2998             'title': 'youtube-dl test video',
2999         }
3000     }, {
3001         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3002         'only_matching': True,
3003     }]
3004
3005     def _real_extract(self, url):
3006         mobj = re.match(self._VALID_URL, url)
3007         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
3008         webpage = self._download_webpage(url, query)
3009         return self.playlist_result(self._process_page(webpage), playlist_title=query)
3010
3011
3012 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
3013     IE_DESC = 'YouTube.com (multi-season) shows'
3014     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
3015     IE_NAME = 'youtube:show'
3016     _TESTS = [{
3017         'url': 'https://www.youtube.com/show/airdisasters',
3018         'playlist_mincount': 5,
3019         'info_dict': {
3020             'id': 'airdisasters',
3021             'title': 'Air Disasters',
3022         }
3023     }]
3024
3025     def _real_extract(self, url):
3026         playlist_id = self._match_id(url)
3027         return super(YoutubeShowIE, self)._real_extract(
3028             'https://www.youtube.com/show/%s/playlists' % playlist_id)
3029
3030
3031 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
3032     """
3033     Base class for feed extractors
3034     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3035     """
3036     _LOGIN_REQUIRED = True
3037
3038     @property
3039     def IE_NAME(self):
3040         return 'youtube:%s' % self._FEED_NAME
3041
3042     def _real_initialize(self):
3043         self._login()
3044
3045     def _entries(self, page):
3046         # The extraction process is the same as for playlists, but the regex
3047         # for the video ids doesn't contain an index
3048         ids = []
3049         more_widget_html = content_html = page
3050         for page_num in itertools.count(1):
3051             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
3052
3053             # 'recommended' feed has infinite 'load more' and each new portion spins
3054             # the same videos in (sometimes) slightly different order, so we'll check
3055             # for unicity and break when portion has no new videos
3056             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
3057             if not new_ids:
3058                 break
3059
3060             ids.extend(new_ids)
3061
3062             for entry in self._ids_to_results(new_ids):
3063                 yield entry
3064
3065             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3066             if not mobj:
3067                 break
3068
3069             more = self._download_json(
3070                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
3071                 'Downloading page #%s' % page_num,
3072                 transform_source=uppercase_escape)
3073             content_html = more['content_html']
3074             more_widget_html = more['load_more_widget_html']
3075
3076     def _real_extract(self, url):
3077         page = self._download_webpage(
3078             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3079             self._PLAYLIST_TITLE)
3080         return self.playlist_result(
3081             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
3082
3083
3084 class YoutubeWatchLaterIE(YoutubePlaylistIE):
3085     IE_NAME = 'youtube:watchlater'
3086     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3087     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
3088
3089     _TESTS = [{
3090         'url': 'https://www.youtube.com/playlist?list=WL',
3091         'only_matching': True,
3092     }, {
3093         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3094         'only_matching': True,
3095     }]
3096
3097     def _real_extract(self, url):
3098         _, video = self._check_download_just_video(url, 'WL')
3099         if video:
3100             return video
3101         _, playlist = self._extract_playlist('WL')
3102         return playlist
3103
3104
3105 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
3106     IE_NAME = 'youtube:favorites'
3107     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
3108     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
3109     _LOGIN_REQUIRED = True
3110
3111     def _real_extract(self, url):
3112         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
3113         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
3114         return self.url_result(playlist_id, 'YoutubePlaylist')
3115
3116
3117 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3118     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3119     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
3120     _FEED_NAME = 'recommended'
3121     _PLAYLIST_TITLE = 'Youtube Recommended videos'
3122
3123
3124 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3125     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
3126     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
3127     _FEED_NAME = 'subscriptions'
3128     _PLAYLIST_TITLE = 'Youtube Subscriptions'
3129
3130
3131 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3132     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
3133     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
3134     _FEED_NAME = 'history'
3135     _PLAYLIST_TITLE = 'Youtube History'
3136
3137
3138 class YoutubeTruncatedURLIE(InfoExtractor):
3139     IE_NAME = 'youtube:truncated_url'
3140     IE_DESC = False  # Do not list
3141     _VALID_URL = r'''(?x)
3142         (?:https?://)?
3143         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3144         (?:watch\?(?:
3145             feature=[a-z_]+|
3146             annotation_id=annotation_[^&]+|
3147             x-yt-cl=[0-9]+|
3148             hl=[^&]*|
3149             t=[0-9]+
3150         )?
3151         |
3152             attribution_link\?a=[^&]+
3153         )
3154         $
3155     '''
3156
3157     _TESTS = [{
3158         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
3159         'only_matching': True,
3160     }, {
3161         'url': 'https://www.youtube.com/watch?',
3162         'only_matching': True,
3163     }, {
3164         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3165         'only_matching': True,
3166     }, {
3167         'url': 'https://www.youtube.com/watch?feature=foo',
3168         'only_matching': True,
3169     }, {
3170         'url': 'https://www.youtube.com/watch?hl=en-GB',
3171         'only_matching': True,
3172     }, {
3173         'url': 'https://www.youtube.com/watch?t=2372',
3174         'only_matching': True,
3175     }]
3176
3177     def _real_extract(self, url):
3178         raise ExtractorError(
3179             'Did you forget to quote the URL? Remember that & is a meta '
3180             'character in most shells, so you want to put the URL in quotes, '
3181             'like  youtube-dl '
3182             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3183             ' or simply  youtube-dl BaW_jenozKc  .',
3184             expected=True)
3185
3186
3187 class YoutubeTruncatedIDIE(InfoExtractor):
3188     IE_NAME = 'youtube:truncated_id'
3189     IE_DESC = False  # Do not list
3190     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
3191
3192     _TESTS = [{
3193         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3194         'only_matching': True,
3195     }]
3196
3197     def _real_extract(self, url):
3198         video_id = self._match_id(url)
3199         raise ExtractorError(
3200             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3201             expected=True)