Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     uppercase_escape,
  50     urlencode_postdata,
  51 )
  52
  53
  54 class YoutubeBaseInfoExtractor(InfoExtractor):
  55     """Provide base functions for Youtube extractors"""
  56     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  57     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  58
  59     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  60     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  61     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  62
  63     _NETRC_MACHINE = 'youtube'
  64     # If True it will raise an error if no login info is provided
  65     _LOGIN_REQUIRED = False
  66
  67     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
  68
  69     def _set_language(self):
  70         self._set_cookie(
  71             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  72             # YouTube sets the expire time to about two months
  73             expire_time=time.time() + 2 * 30 * 24 * 3600)
  74
  75     def _ids_to_results(self, ids):
  76         return [
  77             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  78             for vid_id in ids]
  79
  80     def _login(self):
  81         """
  82         Attempt to log in to YouTube.
  83         True is returned if successful or skipped.
  84         False is returned if login failed.
  85
  86         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  87         """
  88         username, password = self._get_login_info()
  89         # No authentication to be performed
  90         if username is None:
  91             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  92                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  93             return True
  94
  95         login_page = self._download_webpage(
  96             self._LOGIN_URL, None,
  97             note='Downloading login page',
  98             errnote='unable to fetch login page', fatal=False)
  99         if login_page is False:
 100             return
 101
 102         login_form = self._hidden_inputs(login_page)
 103
 104         def req(url, f_req, note, errnote):
 105             data = login_form.copy()
 106             data.update({
 107                 'pstMsg': 1,
 108                 'checkConnection': 'youtube',
 109                 'checkedDomains': 'youtube',
 110                 'hl': 'en',
 111                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 112                 'f.req': json.dumps(f_req),
 113                 'flowName': 'GlifWebSignIn',
 114                 'flowEntry': 'ServiceLogin',
 115             })
 116             return self._download_json(
 117                 url, None, note=note, errnote=errnote,
 118                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 119                 fatal=False,
 120                 data=urlencode_postdata(data), headers={
 121                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 122                     'Google-Accounts-XSRF': 1,
 123                 })
 124
 125         def warn(message):
 126             self._downloader.report_warning(message)
 127
 128         lookup_req = [
 129             username,
 130             None, [], None, 'US', None, None, 2, False, True,
 131             [
 132                 None, None,
 133                 [2, 1, None, 1,
 134                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 135                  None, [], 4],
 136                 1, [None, None, []], None, None, None, True
 137             ],
 138             username,
 139         ]
 140
 141         lookup_results = req(
 142             self._LOOKUP_URL, lookup_req,
 143             'Looking up account info', 'Unable to look up account info')
 144
 145         if lookup_results is False:
 146             return False
 147
 148         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 149         if not user_hash:
 150             warn('Unable to extract user hash')
 151             return False
 152
 153         challenge_req = [
 154             user_hash,
 155             None, 1, None, [1, None, None, None, [password, None, True]],
 156             [
 157                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ]]
 160
 161         challenge_results = req(
 162             self._CHALLENGE_URL, challenge_req,
 163             'Logging in', 'Unable to log in')
 164
 165         if challenge_results is False:
 166             return
 167
 168         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 169         if login_res:
 170             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 171             warn(
 172                 'Unable to login: %s' % 'Invalid password'
 173                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 174             return False
 175
 176         res = try_get(challenge_results, lambda x: x[0][-1], list)
 177         if not res:
 178             warn('Unable to extract result entry')
 179             return False
 180
 181         tfa = try_get(res, lambda x: x[0][0], list)
 182         if tfa:
 183             tfa_str = try_get(tfa, lambda x: x[2], compat_str)
 184             if tfa_str == 'TWO_STEP_VERIFICATION':
 185                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 186                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 187                 status = try_get(tfa, lambda x: x[5], compat_str)
 188                 if status == 'QUOTA_EXCEEDED':
 189                     warn('Exceeded the limit of TFA codes, try later')
 190                     return False
 191
 192                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 193                 if not tl:
 194                     warn('Unable to extract TL')
 195                     return False
 196
 197                 tfa_code = self._get_tfa_info('2-step verification code')
 198
 199                 if not tfa_code:
 200                     warn(
 201                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 202                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 203                     return False
 204
 205                 tfa_code = remove_start(tfa_code, 'G-')
 206
 207                 tfa_req = [
 208                     user_hash, None, 2, None,
 209                     [
 210                         9, None, None, None, None, None, None, None,
 211                         [None, tfa_code, True, 2]
 212                     ]]
 213
 214                 tfa_results = req(
 215                     self._TFA_URL.format(tl), tfa_req,
 216                     'Submitting TFA code', 'Unable to submit TFA code')
 217
 218                 if tfa_results is False:
 219                     return False
 220
 221                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 222                 if tfa_res:
 223                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 224                     warn(
 225                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 226                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 227                     return False
 228
 229                 check_cookie_url = try_get(
 230                     tfa_results, lambda x: x[0][-1][2], compat_str)
 231         else:
 232             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 233
 234         if not check_cookie_url:
 235             warn('Unable to extract CheckCookie URL')
 236             return False
 237
 238         check_cookie_results = self._download_webpage(
 239             check_cookie_url, None, 'Checking cookie', fatal=False)
 240
 241         if check_cookie_results is False:
 242             return False
 243
 244         if 'https://myaccount.google.com/' not in check_cookie_results:
 245             warn('Unable to log in')
 246             return False
 247
 248         return True
 249
 250     def _download_webpage_handle(self, *args, **kwargs):
 251         kwargs.setdefault('query', {})['disable_polymer'] = 'true'
 252         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 253             *args, **compat_kwargs(kwargs))
 254
 255     def _real_initialize(self):
 256         if self._downloader is None:
 257             return
 258         self._set_language()
 259         if not self._login():
 260             return
 261
 262
 263 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 264     # Extract entries from page with "Load more" button
 265     def _entries(self, page, playlist_id):
 266         more_widget_html = content_html = page
 267         for page_num in itertools.count(1):
 268             for entry in self._process_page(content_html):
 269                 yield entry
 270
 271             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 272             if not mobj:
 273                 break
 274
 275             more = self._download_json(
 276                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 277                 'Downloading page #%s' % page_num,
 278                 transform_source=uppercase_escape)
 279             content_html = more['content_html']
 280             if not content_html.strip():
 281                 # Some webpages show a "Load more" button but they don't
 282                 # have more videos
 283                 break
 284             more_widget_html = more['load_more_widget_html']
 285
 286
 287 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 288     def _process_page(self, content):
 289         for video_id, video_title in self.extract_videos_from_page(content):
 290             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 291
 292     def extract_videos_from_page(self, page):
 293         ids_in_page = []
 294         titles_in_page = []
 295         for mobj in re.finditer(self._VIDEO_RE, page):
 296             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 297             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 298                 continue
 299             video_id = mobj.group('id')
 300             video_title = unescapeHTML(mobj.group('title'))
 301             if video_title:
 302                 video_title = video_title.strip()
 303             try:
 304                 idx = ids_in_page.index(video_id)
 305                 if video_title and not titles_in_page[idx]:
 306                     titles_in_page[idx] = video_title
 307             except ValueError:
 308                 ids_in_page.append(video_id)
 309                 titles_in_page.append(video_title)
 310         return zip(ids_in_page, titles_in_page)
 311
 312
 313 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 314     def _process_page(self, content):
 315         for playlist_id in orderedSet(re.findall(
 316                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 317                 content)):
 318             yield self.url_result(
 319                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 320
 321     def _real_extract(self, url):
 322         playlist_id = self._match_id(url)
 323         webpage = self._download_webpage(url, playlist_id)
 324         title = self._og_search_title(webpage, fatal=False)
 325         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 326
 327
 328 class YoutubeIE(YoutubeBaseInfoExtractor):
 329     IE_DESC = 'YouTube.com'
 330     _VALID_URL = r"""(?x)^
 331                      (
 332                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 333                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 334                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 335                             (?:www\.)?pwnyoutube\.com/|
 336                             (?:www\.)?hooktube\.com/|
 337                             (?:www\.)?yourepeat\.com/|
 338                             tube\.majestyc\.net/|
 339                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 340                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 341                          (?:                                                  # the various things that can precede the ID:
 342                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 343                              |(?:                                             # or the v= param in all its forms
 344                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 345                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 346                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 347                                  v=
 348                              )
 349                          ))
 350                          |(?:
 351                             youtu\.be|                                        # just youtu.be/xxxx
 352                             vid\.plus|                                        # or vid.plus/xxxx
 353                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 354                          )/
 355                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 356                          )
 357                      )?                                                       # all until now is optional -> you can pass the naked ID
 358                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 359                      (?!.*?\blist=
 360                         (?:
 361                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 362                             WL                                                # WL are handled by the watch later IE
 363                         )
 364                      )
 365                      (?(1).+)?                                                # if we found the ID, everything can follow
 366                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 367     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 368     _formats = {
 369         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 370         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 371         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 372         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 373         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 374         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 375         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 376         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 377         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 378         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 379         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 380         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 381         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 382         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 383         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 384         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 385         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 386         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 387
 388
 389         # 3D videos
 390         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 391         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 392         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 393         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 394         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 395         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 396         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 397
 398         # Apple HTTP Live Streaming
 399         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 400         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 401         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 402         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 403         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 404         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 405         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 406         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 407
 408         # DASH mp4 video
 409         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 410         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 411         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 412         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 413         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 414         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 415         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 416         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 417         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 418         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 419         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 420         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 421
 422         # Dash mp4 audio
 423         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 424         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 425         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 426         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 427         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 428         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 429         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 430
 431         # Dash webm
 432         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 433         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 434         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 435         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 436         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 437         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 438         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 439         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 440         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 441         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 442         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 443         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 444         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 445         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 446         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 447         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 448         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 449         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 450         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 451         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 452         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 453         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 454
 455         # Dash webm audio
 456         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 457         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 458
 459         # Dash webm audio with opus inside
 460         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 461         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 462         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 463
 464         # RTMP (unnamed)
 465         '_rtmp': {'protocol': 'rtmp'},
 466     }
 467     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 468
 469     _GEO_BYPASS = False
 470
 471     IE_NAME = 'youtube'
 472     _TESTS = [
 473         {
 474             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 475             'info_dict': {
 476                 'id': 'BaW_jenozKc',
 477                 'ext': 'mp4',
 478                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 479                 'uploader': 'Philipp Hagemeister',
 480                 'uploader_id': 'phihag',
 481                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 482                 'upload_date': '20121002',
 483                 'license': 'Standard YouTube License',
 484                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 485                 'categories': ['Science & Technology'],
 486                 'tags': ['youtube-dl'],
 487                 'duration': 10,
 488                 'like_count': int,
 489                 'dislike_count': int,
 490                 'start_time': 1,
 491                 'end_time': 9,
 492             }
 493         },
 494         {
 495             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 496             'note': 'Test generic use_cipher_signature video (#897)',
 497             'info_dict': {
 498                 'id': 'UxxajLWwzqY',
 499                 'ext': 'mp4',
 500                 'upload_date': '20120506',
 501                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 502                 'alt_title': 'I Love It (feat. Charli XCX)',
 503                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 504                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 505                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 506                          'iconic ep', 'iconic', 'love', 'it'],
 507                 'duration': 180,
 508                 'uploader': 'Icona Pop',
 509                 'uploader_id': 'IconaPop',
 510                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 511                 'license': 'Standard YouTube License',
 512                 'creator': 'Icona Pop',
 513                 'track': 'I Love It (feat. Charli XCX)',
 514                 'artist': 'Icona Pop',
 515             }
 516         },
 517         {
 518             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 519             'note': 'Test VEVO video with age protection (#956)',
 520             'info_dict': {
 521                 'id': '07FYdnEawAQ',
 522                 'ext': 'mp4',
 523                 'upload_date': '20130703',
 524                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 525                 'alt_title': 'Tunnel Vision',
 526                 'description': 'md5:64249768eec3bc4276236606ea996373',
 527                 'duration': 419,
 528                 'uploader': 'justintimberlakeVEVO',
 529                 'uploader_id': 'justintimberlakeVEVO',
 530                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 531                 'license': 'Standard YouTube License',
 532                 'creator': 'Justin Timberlake',
 533                 'track': 'Tunnel Vision',
 534                 'artist': 'Justin Timberlake',
 535                 'age_limit': 18,
 536             }
 537         },
 538         {
 539             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 540             'note': 'Embed-only video (#1746)',
 541             'info_dict': {
 542                 'id': 'yZIXLfi8CZQ',
 543                 'ext': 'mp4',
 544                 'upload_date': '20120608',
 545                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 546                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 547                 'uploader': 'SET India',
 548                 'uploader_id': 'setindia',
 549                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 550                 'license': 'Standard YouTube License',
 551                 'age_limit': 18,
 552             }
 553         },
 554         {
 555             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 556             'note': 'Use the first video ID in the URL',
 557             'info_dict': {
 558                 'id': 'BaW_jenozKc',
 559                 'ext': 'mp4',
 560                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 561                 'uploader': 'Philipp Hagemeister',
 562                 'uploader_id': 'phihag',
 563                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 564                 'upload_date': '20121002',
 565                 'license': 'Standard YouTube License',
 566                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 567                 'categories': ['Science & Technology'],
 568                 'tags': ['youtube-dl'],
 569                 'duration': 10,
 570                 'like_count': int,
 571                 'dislike_count': int,
 572             },
 573             'params': {
 574                 'skip_download': True,
 575             },
 576         },
 577         {
 578             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 579             'note': '256k DASH audio (format 141) via DASH manifest',
 580             'info_dict': {
 581                 'id': 'a9LDPn-MO4I',
 582                 'ext': 'm4a',
 583                 'upload_date': '20121002',
 584                 'uploader_id': '8KVIDEO',
 585                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 586                 'description': '',
 587                 'uploader': '8KVIDEO',
 588                 'license': 'Standard YouTube License',
 589                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 590             },
 591             'params': {
 592                 'youtube_include_dash_manifest': True,
 593                 'format': '141',
 594             },
 595             'skip': 'format 141 not served anymore',
 596         },
 597         # DASH manifest with encrypted signature
 598         {
 599             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 600             'info_dict': {
 601                 'id': 'IB3lcPjvWLA',
 602                 'ext': 'm4a',
 603                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 604                 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
 605                 'duration': 244,
 606                 'uploader': 'AfrojackVEVO',
 607                 'uploader_id': 'AfrojackVEVO',
 608                 'upload_date': '20131011',
 609                 'license': 'Standard YouTube License',
 610             },
 611             'params': {
 612                 'youtube_include_dash_manifest': True,
 613                 'format': '141/bestaudio[ext=m4a]',
 614             },
 615         },
 616         # JS player signature function name containing $
 617         {
 618             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 619             'info_dict': {
 620                 'id': 'nfWlot6h_JM',
 621                 'ext': 'm4a',
 622                 'title': 'Taylor Swift - Shake It Off',
 623                 'alt_title': 'Shake It Off',
 624                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 625                 'duration': 242,
 626                 'uploader': 'TaylorSwiftVEVO',
 627                 'uploader_id': 'TaylorSwiftVEVO',
 628                 'upload_date': '20140818',
 629                 'license': 'Standard YouTube License',
 630                 'creator': 'Taylor Swift',
 631             },
 632             'params': {
 633                 'youtube_include_dash_manifest': True,
 634                 'format': '141/bestaudio[ext=m4a]',
 635             },
 636         },
 637         # Controversy video
 638         {
 639             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 640             'info_dict': {
 641                 'id': 'T4XJQO3qol8',
 642                 'ext': 'mp4',
 643                 'duration': 219,
 644                 'upload_date': '20100909',
 645                 'uploader': 'TJ Kirk',
 646                 'uploader_id': 'TheAmazingAtheist',
 647                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 648                 'license': 'Standard YouTube License',
 649                 'title': 'Burning Everyone\'s Koran',
 650                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 651             }
 652         },
 653         # Normal age-gate video (No vevo, embed allowed)
 654         {
 655             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 656             'info_dict': {
 657                 'id': 'HtVdAasjOgU',
 658                 'ext': 'mp4',
 659                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 660                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 661                 'duration': 142,
 662                 'uploader': 'The Witcher',
 663                 'uploader_id': 'WitcherGame',
 664                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 665                 'upload_date': '20140605',
 666                 'license': 'Standard YouTube License',
 667                 'age_limit': 18,
 668             },
 669         },
 670         # Age-gate video with encrypted signature
 671         {
 672             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 673             'info_dict': {
 674                 'id': '6kLq3WMV1nU',
 675                 'ext': 'webm',
 676                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 677                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 678                 'duration': 246,
 679                 'uploader': 'LloydVEVO',
 680                 'uploader_id': 'LloydVEVO',
 681                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 682                 'upload_date': '20110629',
 683                 'license': 'Standard YouTube License',
 684                 'age_limit': 18,
 685             },
 686         },
 687         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 688         # YouTube Red ad is not captured for creator
 689         {
 690             'url': '__2ABJjxzNo',
 691             'info_dict': {
 692                 'id': '__2ABJjxzNo',
 693                 'ext': 'mp4',
 694                 'duration': 266,
 695                 'upload_date': '20100430',
 696                 'uploader_id': 'deadmau5',
 697                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 698                 'creator': 'deadmau5',
 699                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 700                 'uploader': 'deadmau5',
 701                 'license': 'Standard YouTube License',
 702                 'title': 'Deadmau5 - Some Chords (HD)',
 703                 'alt_title': 'Some Chords',
 704             },
 705             'expected_warnings': [
 706                 'DASH manifest missing',
 707             ]
 708         },
 709         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 710         {
 711             'url': 'lqQg6PlCWgI',
 712             'info_dict': {
 713                 'id': 'lqQg6PlCWgI',
 714                 'ext': 'mp4',
 715                 'duration': 6085,
 716                 'upload_date': '20150827',
 717                 'uploader_id': 'olympic',
 718                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 719                 'license': 'Standard YouTube License',
 720                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 721                 'uploader': 'Olympic',
 722                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 723             },
 724             'params': {
 725                 'skip_download': 'requires avconv',
 726             }
 727         },
 728         # Non-square pixels
 729         {
 730             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 731             'info_dict': {
 732                 'id': '_b-2C3KPAM0',
 733                 'ext': 'mp4',
 734                 'stretched_ratio': 16 / 9.,
 735                 'duration': 85,
 736                 'upload_date': '20110310',
 737                 'uploader_id': 'AllenMeow',
 738                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 739                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 740                 'uploader': '孫ᄋᄅ',
 741                 'license': 'Standard YouTube License',
 742                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 743             },
 744         },
 745         # url_encoded_fmt_stream_map is empty string
 746         {
 747             'url': 'qEJwOuvDf7I',
 748             'info_dict': {
 749                 'id': 'qEJwOuvDf7I',
 750                 'ext': 'webm',
 751                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 752                 'description': '',
 753                 'upload_date': '20150404',
 754                 'uploader_id': 'spbelect',
 755                 'uploader': 'Наблюдатели Петербурга',
 756             },
 757             'params': {
 758                 'skip_download': 'requires avconv',
 759             },
 760             'skip': 'This live event has ended.',
 761         },
 762         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 763         {
 764             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 765             'info_dict': {
 766                 'id': 'FIl7x6_3R5Y',
 767                 'ext': 'webm',
 768                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 769                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 770                 'duration': 220,
 771                 'upload_date': '20150625',
 772                 'uploader_id': 'dorappi2000',
 773                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 774                 'uploader': 'dorappi2000',
 775                 'license': 'Standard YouTube License',
 776                 'formats': 'mincount:31',
 777             },
 778             'skip': 'not actual anymore',
 779         },
 780         # DASH manifest with segment_list
 781         {
 782             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 783             'md5': '8ce563a1d667b599d21064e982ab9e31',
 784             'info_dict': {
 785                 'id': 'CsmdDsKjzN8',
 786                 'ext': 'mp4',
 787                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 788                 'uploader': 'Airtek',
 789                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 790                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 791                 'license': 'Standard YouTube License',
 792                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 793             },
 794             'params': {
 795                 'youtube_include_dash_manifest': True,
 796                 'format': '135',  # bestvideo
 797             },
 798             'skip': 'This live event has ended.',
 799         },
 800         {
 801             # Multifeed videos (multiple cameras), URL is for Main Camera
 802             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 803             'info_dict': {
 804                 'id': 'jqWvoWXjCVs',
 805                 'title': 'teamPGP: Rocket League Noob Stream',
 806                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 807             },
 808             'playlist': [{
 809                 'info_dict': {
 810                     'id': 'jqWvoWXjCVs',
 811                     'ext': 'mp4',
 812                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 813                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 814                     'duration': 7335,
 815                     'upload_date': '20150721',
 816                     'uploader': 'Beer Games Beer',
 817                     'uploader_id': 'beergamesbeer',
 818                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 819                     'license': 'Standard YouTube License',
 820                 },
 821             }, {
 822                 'info_dict': {
 823                     'id': '6h8e8xoXJzg',
 824                     'ext': 'mp4',
 825                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 826                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 827                     'duration': 7337,
 828                     'upload_date': '20150721',
 829                     'uploader': 'Beer Games Beer',
 830                     'uploader_id': 'beergamesbeer',
 831                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 832                     'license': 'Standard YouTube License',
 833                 },
 834             }, {
 835                 'info_dict': {
 836                     'id': 'PUOgX5z9xZw',
 837                     'ext': 'mp4',
 838                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 839                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 840                     'duration': 7337,
 841                     'upload_date': '20150721',
 842                     'uploader': 'Beer Games Beer',
 843                     'uploader_id': 'beergamesbeer',
 844                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 845                     'license': 'Standard YouTube License',
 846                 },
 847             }, {
 848                 'info_dict': {
 849                     'id': 'teuwxikvS5k',
 850                     'ext': 'mp4',
 851                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 852                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 853                     'duration': 7334,
 854                     'upload_date': '20150721',
 855                     'uploader': 'Beer Games Beer',
 856                     'uploader_id': 'beergamesbeer',
 857                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 858                     'license': 'Standard YouTube License',
 859                 },
 860             }],
 861             'params': {
 862                 'skip_download': True,
 863             },
 864         },
 865         {
 866             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 867             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 868             'info_dict': {
 869                 'id': 'gVfLd0zydlo',
 870                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 871             },
 872             'playlist_count': 2,
 873             'skip': 'Not multifeed anymore',
 874         },
 875         {
 876             'url': 'https://vid.plus/FlRa-iH7PGw',
 877             'only_matching': True,
 878         },
 879         {
 880             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 881             'only_matching': True,
 882         },
 883         {
 884             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 885             # Also tests cut-off URL expansion in video description (see
 886             # https://github.com/rg3/youtube-dl/issues/1892,
 887             # https://github.com/rg3/youtube-dl/issues/8164)
 888             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 889             'info_dict': {
 890                 'id': 'lsguqyKfVQg',
 891                 'ext': 'mp4',
 892                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 893                 'alt_title': 'Dark Walk - Position Music',
 894                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 895                 'duration': 133,
 896                 'upload_date': '20151119',
 897                 'uploader_id': 'IronSoulElf',
 898                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 899                 'uploader': 'IronSoulElf',
 900                 'license': 'Standard YouTube License',
 901                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 902                 'track': 'Dark Walk - Position Music',
 903                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 904             },
 905             'params': {
 906                 'skip_download': True,
 907             },
 908         },
 909         {
 910             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 911             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 912             'only_matching': True,
 913         },
 914         {
 915             # Video with yt:stretch=17:0
 916             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 917             'info_dict': {
 918                 'id': 'Q39EVAstoRM',
 919                 'ext': 'mp4',
 920                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 921                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 922                 'upload_date': '20151107',
 923                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 924                 'uploader': 'CH GAMER DROID',
 925             },
 926             'params': {
 927                 'skip_download': True,
 928             },
 929             'skip': 'This video does not exist.',
 930         },
 931         {
 932             # Video licensed under Creative Commons
 933             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 934             'info_dict': {
 935                 'id': 'M4gD1WSo5mA',
 936                 'ext': 'mp4',
 937                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 938                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 939                 'duration': 721,
 940                 'upload_date': '20150127',
 941                 'uploader_id': 'BerkmanCenter',
 942                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 943                 'uploader': 'The Berkman Klein Center for Internet & Society',
 944                 'license': 'Creative Commons Attribution license (reuse allowed)',
 945             },
 946             'params': {
 947                 'skip_download': True,
 948             },
 949         },
 950         {
 951             # Channel-like uploader_url
 952             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 953             'info_dict': {
 954                 'id': 'eQcmzGIKrzg',
 955                 'ext': 'mp4',
 956                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 957                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 958                 'duration': 4060,
 959                 'upload_date': '20151119',
 960                 'uploader': 'Bernie Sanders',
 961                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 962                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 963                 'license': 'Creative Commons Attribution license (reuse allowed)',
 964             },
 965             'params': {
 966                 'skip_download': True,
 967             },
 968         },
 969         {
 970             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 971             'only_matching': True,
 972         },
 973         {
 974             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 975             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 976             'only_matching': True,
 977         },
 978         {
 979             # Rental video preview
 980             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 981             'info_dict': {
 982                 'id': 'uGpuVWrhIzE',
 983                 'ext': 'mp4',
 984                 'title': 'Piku - Trailer',
 985                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 986                 'upload_date': '20150811',
 987                 'uploader': 'FlixMatrix',
 988                 'uploader_id': 'FlixMatrixKaravan',
 989                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
 990                 'license': 'Standard YouTube License',
 991             },
 992             'params': {
 993                 'skip_download': True,
 994             },
 995             'skip': 'This video is not available.',
 996         },
 997         {
 998             # YouTube Red video with episode data
 999             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1000             'info_dict': {
1001                 'id': 'iqKdEhx-dD4',
1002                 'ext': 'mp4',
1003                 'title': 'Isolation - Mind Field (Ep 1)',
1004                 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1005                 'duration': 2085,
1006                 'upload_date': '20170118',
1007                 'uploader': 'Vsauce',
1008                 'uploader_id': 'Vsauce',
1009                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1010                 'license': 'Standard YouTube License',
1011                 'series': 'Mind Field',
1012                 'season_number': 1,
1013                 'episode_number': 1,
1014             },
1015             'params': {
1016                 'skip_download': True,
1017             },
1018             'expected_warnings': [
1019                 'Skipping DASH manifest',
1020             ],
1021         },
1022         {
1023             # The following content has been identified by the YouTube community
1024             # as inappropriate or offensive to some audiences.
1025             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1026             'info_dict': {
1027                 'id': '6SJNVb0GnPI',
1028                 'ext': 'mp4',
1029                 'title': 'Race Differences in Intelligence',
1030                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1031                 'duration': 965,
1032                 'upload_date': '20140124',
1033                 'uploader': 'New Century Foundation',
1034                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1035                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1036                 'license': 'Standard YouTube License',
1037             },
1038             'params': {
1039                 'skip_download': True,
1040             },
1041         },
1042         {
1043             # itag 212
1044             'url': '1t24XAntNCY',
1045             'only_matching': True,
1046         },
1047         {
1048             # geo restricted to JP
1049             'url': 'sJL6WA-aGkQ',
1050             'only_matching': True,
1051         },
1052         {
1053             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1054             'only_matching': True,
1055         },
1056     ]
1057
1058     def __init__(self, *args, **kwargs):
1059         super(YoutubeIE, self).__init__(*args, **kwargs)
1060         self._player_cache = {}
1061
1062     def report_video_info_webpage_download(self, video_id):
1063         """Report attempt to download video info webpage."""
1064         self.to_screen('%s: Downloading video info webpage' % video_id)
1065
1066     def report_information_extraction(self, video_id):
1067         """Report attempt to extract video information."""
1068         self.to_screen('%s: Extracting video information' % video_id)
1069
1070     def report_unavailable_format(self, video_id, format):
1071         """Report extracted video URL."""
1072         self.to_screen('%s: Format %s not available' % (video_id, format))
1073
1074     def report_rtmp_download(self):
1075         """Indicate the download will use the RTMP protocol."""
1076         self.to_screen('RTMP download detected')
1077
1078     def _signature_cache_id(self, example_sig):
1079         """ Return a string representation of a signature """
1080         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1081
1082     def _extract_signature_function(self, video_id, player_url, example_sig):
1083         id_m = re.match(
1084             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1085             player_url)
1086         if not id_m:
1087             raise ExtractorError('Cannot identify player %r' % player_url)
1088         player_type = id_m.group('ext')
1089         player_id = id_m.group('id')
1090
1091         # Read from filesystem cache
1092         func_id = '%s_%s_%s' % (
1093             player_type, player_id, self._signature_cache_id(example_sig))
1094         assert os.path.basename(func_id) == func_id
1095
1096         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1097         if cache_spec is not None:
1098             return lambda s: ''.join(s[i] for i in cache_spec)
1099
1100         download_note = (
1101             'Downloading player %s' % player_url
1102             if self._downloader.params.get('verbose') else
1103             'Downloading %s player %s' % (player_type, player_id)
1104         )
1105         if player_type == 'js':
1106             code = self._download_webpage(
1107                 player_url, video_id,
1108                 note=download_note,
1109                 errnote='Download of %s failed' % player_url)
1110             res = self._parse_sig_js(code)
1111         elif player_type == 'swf':
1112             urlh = self._request_webpage(
1113                 player_url, video_id,
1114                 note=download_note,
1115                 errnote='Download of %s failed' % player_url)
1116             code = urlh.read()
1117             res = self._parse_sig_swf(code)
1118         else:
1119             assert False, 'Invalid player type %r' % player_type
1120
1121         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1122         cache_res = res(test_string)
1123         cache_spec = [ord(c) for c in cache_res]
1124
1125         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1126         return res
1127
1128     def _print_sig_code(self, func, example_sig):
1129         def gen_sig_code(idxs):
1130             def _genslice(start, end, step):
1131                 starts = '' if start == 0 else str(start)
1132                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1133                 steps = '' if step == 1 else (':%d' % step)
1134                 return 's[%s%s%s]' % (starts, ends, steps)
1135
1136             step = None
1137             # Quelch pyflakes warnings - start will be set when step is set
1138             start = '(Never used)'
1139             for i, prev in zip(idxs[1:], idxs[:-1]):
1140                 if step is not None:
1141                     if i - prev == step:
1142                         continue
1143                     yield _genslice(start, prev, step)
1144                     step = None
1145                     continue
1146                 if i - prev in [-1, 1]:
1147                     step = i - prev
1148                     start = prev
1149                     continue
1150                 else:
1151                     yield 's[%d]' % prev
1152             if step is None:
1153                 yield 's[%d]' % i
1154             else:
1155                 yield _genslice(start, i, step)
1156
1157         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1158         cache_res = func(test_string)
1159         cache_spec = [ord(c) for c in cache_res]
1160         expr_code = ' + '.join(gen_sig_code(cache_spec))
1161         signature_id_tuple = '(%s)' % (
1162             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1163         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1164                 '    return %s\n') % (signature_id_tuple, expr_code)
1165         self.to_screen('Extracted signature function:\n' + code)
1166
1167     def _parse_sig_js(self, jscode):
1168         funcname = self._search_regex(
1169             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1170              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1171             jscode, 'Initial JS player signature function name', group='sig')
1172
1173         jsi = JSInterpreter(jscode)
1174         initial_function = jsi.extract_function(funcname)
1175         return lambda s: initial_function([s])
1176
1177     def _parse_sig_swf(self, file_contents):
1178         swfi = SWFInterpreter(file_contents)
1179         TARGET_CLASSNAME = 'SignatureDecipher'
1180         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1181         initial_function = swfi.extract_function(searched_class, 'decipher')
1182         return lambda s: initial_function([s])
1183
1184     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1185         """Turn the encrypted s field into a working signature"""
1186
1187         if player_url is None:
1188             raise ExtractorError('Cannot decrypt signature without player_url')
1189
1190         if player_url.startswith('//'):
1191             player_url = 'https:' + player_url
1192         elif not re.match(r'https?://', player_url):
1193             player_url = compat_urlparse.urljoin(
1194                 'https://www.youtube.com', player_url)
1195         try:
1196             player_id = (player_url, self._signature_cache_id(s))
1197             if player_id not in self._player_cache:
1198                 func = self._extract_signature_function(
1199                     video_id, player_url, s
1200                 )
1201                 self._player_cache[player_id] = func
1202             func = self._player_cache[player_id]
1203             if self._downloader.params.get('youtube_print_sig_code'):
1204                 self._print_sig_code(func, s)
1205             return func(s)
1206         except Exception as e:
1207             tb = traceback.format_exc()
1208             raise ExtractorError(
1209                 'Signature extraction failed: ' + tb, cause=e)
1210
1211     def _get_subtitles(self, video_id, webpage):
1212         try:
1213             subs_doc = self._download_xml(
1214                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1215                 video_id, note=False)
1216         except ExtractorError as err:
1217             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1218             return {}
1219
1220         sub_lang_list = {}
1221         for track in subs_doc.findall('track'):
1222             lang = track.attrib['lang_code']
1223             if lang in sub_lang_list:
1224                 continue
1225             sub_formats = []
1226             for ext in self._SUBTITLE_FORMATS:
1227                 params = compat_urllib_parse_urlencode({
1228                     'lang': lang,
1229                     'v': video_id,
1230                     'fmt': ext,
1231                     'name': track.attrib['name'].encode('utf-8'),
1232                 })
1233                 sub_formats.append({
1234                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1235                     'ext': ext,
1236                 })
1237             sub_lang_list[lang] = sub_formats
1238         if not sub_lang_list:
1239             self._downloader.report_warning('video doesn\'t have subtitles')
1240             return {}
1241         return sub_lang_list
1242
1243     def _get_ytplayer_config(self, video_id, webpage):
1244         patterns = (
1245             # User data may contain arbitrary character sequences that may affect
1246             # JSON extraction with regex, e.g. when '};' is contained the second
1247             # regex won't capture the whole JSON. Yet working around by trying more
1248             # concrete regex first keeping in mind proper quoted string handling
1249             # to be implemented in future that will replace this workaround (see
1250             # https://github.com/rg3/youtube-dl/issues/7468,
1251             # https://github.com/rg3/youtube-dl/pull/7599)
1252             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1253             r';ytplayer\.config\s*=\s*({.+?});',
1254         )
1255         config = self._search_regex(
1256             patterns, webpage, 'ytplayer.config', default=None)
1257         if config:
1258             return self._parse_json(
1259                 uppercase_escape(config), video_id, fatal=False)
1260
1261     def _get_automatic_captions(self, video_id, webpage):
1262         """We need the webpage for getting the captions url, pass it as an
1263            argument to speed up the process."""
1264         self.to_screen('%s: Looking for automatic captions' % video_id)
1265         player_config = self._get_ytplayer_config(video_id, webpage)
1266         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1267         if not player_config:
1268             self._downloader.report_warning(err_msg)
1269             return {}
1270         try:
1271             args = player_config['args']
1272             caption_url = args.get('ttsurl')
1273             if caption_url:
1274                 timestamp = args['timestamp']
1275                 # We get the available subtitles
1276                 list_params = compat_urllib_parse_urlencode({
1277                     'type': 'list',
1278                     'tlangs': 1,
1279                     'asrs': 1,
1280                 })
1281                 list_url = caption_url + '&' + list_params
1282                 caption_list = self._download_xml(list_url, video_id)
1283                 original_lang_node = caption_list.find('track')
1284                 if original_lang_node is None:
1285                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1286                     return {}
1287                 original_lang = original_lang_node.attrib['lang_code']
1288                 caption_kind = original_lang_node.attrib.get('kind', '')
1289
1290                 sub_lang_list = {}
1291                 for lang_node in caption_list.findall('target'):
1292                     sub_lang = lang_node.attrib['lang_code']
1293                     sub_formats = []
1294                     for ext in self._SUBTITLE_FORMATS:
1295                         params = compat_urllib_parse_urlencode({
1296                             'lang': original_lang,
1297                             'tlang': sub_lang,
1298                             'fmt': ext,
1299                             'ts': timestamp,
1300                             'kind': caption_kind,
1301                         })
1302                         sub_formats.append({
1303                             'url': caption_url + '&' + params,
1304                             'ext': ext,
1305                         })
1306                     sub_lang_list[sub_lang] = sub_formats
1307                 return sub_lang_list
1308
1309             def make_captions(sub_url, sub_langs):
1310                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1311                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1312                 captions = {}
1313                 for sub_lang in sub_langs:
1314                     sub_formats = []
1315                     for ext in self._SUBTITLE_FORMATS:
1316                         caption_qs.update({
1317                             'tlang': [sub_lang],
1318                             'fmt': [ext],
1319                         })
1320                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1321                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1322                         sub_formats.append({
1323                             'url': sub_url,
1324                             'ext': ext,
1325                         })
1326                     captions[sub_lang] = sub_formats
1327                 return captions
1328
1329             # New captions format as of 22.06.2017
1330             player_response = args.get('player_response')
1331             if player_response and isinstance(player_response, compat_str):
1332                 player_response = self._parse_json(
1333                     player_response, video_id, fatal=False)
1334                 if player_response:
1335                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1336                     base_url = renderer['captionTracks'][0]['baseUrl']
1337                     sub_lang_list = []
1338                     for lang in renderer['translationLanguages']:
1339                         lang_code = lang.get('languageCode')
1340                         if lang_code:
1341                             sub_lang_list.append(lang_code)
1342                     return make_captions(base_url, sub_lang_list)
1343
1344             # Some videos don't provide ttsurl but rather caption_tracks and
1345             # caption_translation_languages (e.g. 20LmZk1hakA)
1346             # Does not used anymore as of 22.06.2017
1347             caption_tracks = args['caption_tracks']
1348             caption_translation_languages = args['caption_translation_languages']
1349             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1350             sub_lang_list = []
1351             for lang in caption_translation_languages.split(','):
1352                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1353                 sub_lang = lang_qs.get('lc', [None])[0]
1354                 if sub_lang:
1355                     sub_lang_list.append(sub_lang)
1356             return make_captions(caption_url, sub_lang_list)
1357         # An extractor error can be raise by the download process if there are
1358         # no automatic captions but there are subtitles
1359         except (KeyError, IndexError, ExtractorError):
1360             self._downloader.report_warning(err_msg)
1361             return {}
1362
1363     def _mark_watched(self, video_id, video_info):
1364         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1365         if not playback_url:
1366             return
1367         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1368         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1369
1370         # cpn generation algorithm is reverse engineered from base.js.
1371         # In fact it works even with dummy cpn.
1372         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1373         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1374
1375         qs.update({
1376             'ver': ['2'],
1377             'cpn': [cpn],
1378         })
1379         playback_url = compat_urlparse.urlunparse(
1380             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1381
1382         self._download_webpage(
1383             playback_url, video_id, 'Marking watched',
1384             'Unable to mark watched', fatal=False)
1385
1386     @staticmethod
1387     def _extract_urls(webpage):
1388         # Embedded YouTube player
1389         entries = [
1390             unescapeHTML(mobj.group('url'))
1391             for mobj in re.finditer(r'''(?x)
1392             (?:
1393                 <iframe[^>]+?src=|
1394                 data-video-url=|
1395                 <embed[^>]+?src=|
1396                 embedSWF\(?:\s*|
1397                 <object[^>]+data=|
1398                 new\s+SWFObject\(
1399             )
1400             (["\'])
1401                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1402                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1403             \1''', webpage)]
1404
1405         # lazyYT YouTube embed
1406         entries.extend(list(map(
1407             unescapeHTML,
1408             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1409
1410         # Wordpress "YouTube Video Importer" plugin
1411         matches = re.findall(r'''(?x)<div[^>]+
1412             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1413             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1414         entries.extend(m[-1] for m in matches)
1415
1416         return entries
1417
1418     @staticmethod
1419     def _extract_url(webpage):
1420         urls = YoutubeIE._extract_urls(webpage)
1421         return urls[0] if urls else None
1422
1423     @classmethod
1424     def extract_id(cls, url):
1425         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1426         if mobj is None:
1427             raise ExtractorError('Invalid URL: %s' % url)
1428         video_id = mobj.group(2)
1429         return video_id
1430
1431     def _extract_annotations(self, video_id):
1432         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1433         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1434
1435     @staticmethod
1436     def _extract_chapters(description, duration):
1437         if not description:
1438             return None
1439         chapter_lines = re.findall(
1440             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1441             description)
1442         if not chapter_lines:
1443             return None
1444         chapters = []
1445         for next_num, (chapter_line, time_point) in enumerate(
1446                 chapter_lines, start=1):
1447             start_time = parse_duration(time_point)
1448             if start_time is None:
1449                 continue
1450             if start_time > duration:
1451                 break
1452             end_time = (duration if next_num == len(chapter_lines)
1453                         else parse_duration(chapter_lines[next_num][1]))
1454             if end_time is None:
1455                 continue
1456             if end_time > duration:
1457                 end_time = duration
1458             if start_time > end_time:
1459                 break
1460             chapter_title = re.sub(
1461                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1462             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1463             chapters.append({
1464                 'start_time': start_time,
1465                 'end_time': end_time,
1466                 'title': chapter_title,
1467             })
1468         return chapters
1469
1470     def _real_extract(self, url):
1471         url, smuggled_data = unsmuggle_url(url, {})
1472
1473         proto = (
1474             'http' if self._downloader.params.get('prefer_insecure', False)
1475             else 'https')
1476
1477         start_time = None
1478         end_time = None
1479         parsed_url = compat_urllib_parse_urlparse(url)
1480         for component in [parsed_url.fragment, parsed_url.query]:
1481             query = compat_parse_qs(component)
1482             if start_time is None and 't' in query:
1483                 start_time = parse_duration(query['t'][0])
1484             if start_time is None and 'start' in query:
1485                 start_time = parse_duration(query['start'][0])
1486             if end_time is None and 'end' in query:
1487                 end_time = parse_duration(query['end'][0])
1488
1489         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1490         mobj = re.search(self._NEXT_URL_RE, url)
1491         if mobj:
1492             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1493         video_id = self.extract_id(url)
1494
1495         # Get video webpage
1496         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1497         video_webpage = self._download_webpage(url, video_id)
1498
1499         # Attempt to extract SWF player URL
1500         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1501         if mobj is not None:
1502             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1503         else:
1504             player_url = None
1505
1506         dash_mpds = []
1507
1508         def add_dash_mpd(video_info):
1509             dash_mpd = video_info.get('dashmpd')
1510             if dash_mpd and dash_mpd[0] not in dash_mpds:
1511                 dash_mpds.append(dash_mpd[0])
1512
1513         is_live = None
1514         view_count = None
1515
1516         def extract_view_count(v_info):
1517             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1518
1519         # Get video info
1520         embed_webpage = None
1521         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1522             age_gate = True
1523             # We simulate the access to the video from www.youtube.com/v/{video_id}
1524             # this can be viewed without login into Youtube
1525             url = proto + '://www.youtube.com/embed/%s' % video_id
1526             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1527             data = compat_urllib_parse_urlencode({
1528                 'video_id': video_id,
1529                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1530                 'sts': self._search_regex(
1531                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1532             })
1533             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1534             video_info_webpage = self._download_webpage(
1535                 video_info_url, video_id,
1536                 note='Refetching age-gated info webpage',
1537                 errnote='unable to download video info webpage')
1538             video_info = compat_parse_qs(video_info_webpage)
1539             add_dash_mpd(video_info)
1540         else:
1541             age_gate = False
1542             video_info = None
1543             sts = None
1544             # Try looking directly into the video webpage
1545             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1546             if ytplayer_config:
1547                 args = ytplayer_config['args']
1548                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1549                     # Convert to the same format returned by compat_parse_qs
1550                     video_info = dict((k, [v]) for k, v in args.items())
1551                     add_dash_mpd(video_info)
1552                 # Rental video is not rented but preview is available (e.g.
1553                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1554                 # https://github.com/rg3/youtube-dl/issues/10532)
1555                 if not video_info and args.get('ypc_vid'):
1556                     return self.url_result(
1557                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1558                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1559                     is_live = True
1560                 sts = ytplayer_config.get('sts')
1561             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1562                 # We also try looking in get_video_info since it may contain different dashmpd
1563                 # URL that points to a DASH manifest with possibly different itag set (some itags
1564                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1565                 # manifest pointed by get_video_info's dashmpd).
1566                 # The general idea is to take a union of itags of both DASH manifests (for example
1567                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1568                 self.report_video_info_webpage_download(video_id)
1569                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1570                     query = {
1571                         'video_id': video_id,
1572                         'ps': 'default',
1573                         'eurl': '',
1574                         'gl': 'US',
1575                         'hl': 'en',
1576                     }
1577                     if el:
1578                         query['el'] = el
1579                     if sts:
1580                         query['sts'] = sts
1581                     video_info_webpage = self._download_webpage(
1582                         '%s://www.youtube.com/get_video_info' % proto,
1583                         video_id, note=False,
1584                         errnote='unable to download video info webpage',
1585                         fatal=False, query=query)
1586                     if not video_info_webpage:
1587                         continue
1588                     get_video_info = compat_parse_qs(video_info_webpage)
1589                     add_dash_mpd(get_video_info)
1590                     if view_count is None:
1591                         view_count = extract_view_count(get_video_info)
1592                     if not video_info:
1593                         video_info = get_video_info
1594                     if 'token' in get_video_info:
1595                         # Different get_video_info requests may report different results, e.g.
1596                         # some may report video unavailability, but some may serve it without
1597                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1598                         # the original webpage as well as el=info and el=embedded get_video_info
1599                         # requests report video unavailability due to geo restriction while
1600                         # el=detailpage succeeds and returns valid data). This is probably
1601                         # due to YouTube measures against IP ranges of hosting providers.
1602                         # Working around by preferring the first succeeded video_info containing
1603                         # the token if no such video_info yet was found.
1604                         if 'token' not in video_info:
1605                             video_info = get_video_info
1606                         break
1607
1608         def extract_unavailable_message():
1609             return self._html_search_regex(
1610                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1611                 video_webpage, 'unavailable message', default=None)
1612
1613         if 'token' not in video_info:
1614             if 'reason' in video_info:
1615                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1616                     regions_allowed = self._html_search_meta(
1617                         'regionsAllowed', video_webpage, default=None)
1618                     countries = regions_allowed.split(',') if regions_allowed else None
1619                     self.raise_geo_restricted(
1620                         msg=video_info['reason'][0], countries=countries)
1621                 reason = video_info['reason'][0]
1622                 if 'Invalid parameters' in reason:
1623                     unavailable_message = extract_unavailable_message()
1624                     if unavailable_message:
1625                         reason = unavailable_message
1626                 raise ExtractorError(
1627                     'YouTube said: %s' % reason,
1628                     expected=True, video_id=video_id)
1629             else:
1630                 raise ExtractorError(
1631                     '"token" parameter not in video info for unknown reason',
1632                     video_id=video_id)
1633
1634         # title
1635         if 'title' in video_info:
1636             video_title = video_info['title'][0]
1637         else:
1638             self._downloader.report_warning('Unable to extract video title')
1639             video_title = '_'
1640
1641         # description
1642         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1643         if video_description:
1644
1645             def replace_url(m):
1646                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1647                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1648                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1649                     qs = compat_parse_qs(parsed_redir_url.query)
1650                     q = qs.get('q')
1651                     if q and q[0]:
1652                         return q[0]
1653                 return redir_url
1654
1655             description_original = video_description = re.sub(r'''(?x)
1656                 <a\s+
1657                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1658                     (?:title|href)="([^"]+)"\s+
1659                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1660                     class="[^"]*"[^>]*>
1661                 [^<]+\.{3}\s*
1662                 </a>
1663             ''', replace_url, video_description)
1664             video_description = clean_html(video_description)
1665         else:
1666             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1667             if fd_mobj:
1668                 video_description = unescapeHTML(fd_mobj.group(1))
1669             else:
1670                 video_description = ''
1671
1672         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1673             if not self._downloader.params.get('noplaylist'):
1674                 entries = []
1675                 feed_ids = []
1676                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1677                 for feed in multifeed_metadata_list.split(','):
1678                     # Unquote should take place before split on comma (,) since textual
1679                     # fields may contain comma as well (see
1680                     # https://github.com/rg3/youtube-dl/issues/8536)
1681                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1682                     entries.append({
1683                         '_type': 'url_transparent',
1684                         'ie_key': 'Youtube',
1685                         'url': smuggle_url(
1686                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1687                             {'force_singlefeed': True}),
1688                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1689                     })
1690                     feed_ids.append(feed_data['id'][0])
1691                 self.to_screen(
1692                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1693                     % (', '.join(feed_ids), video_id))
1694                 return self.playlist_result(entries, video_id, video_title, video_description)
1695             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1696
1697         if view_count is None:
1698             view_count = extract_view_count(video_info)
1699
1700         # Check for "rental" videos
1701         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1702             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1703
1704         def _extract_filesize(media_url):
1705             return int_or_none(self._search_regex(
1706                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1707
1708         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1709             self.report_rtmp_download()
1710             formats = [{
1711                 'format_id': '_rtmp',
1712                 'protocol': 'rtmp',
1713                 'url': video_info['conn'][0],
1714                 'player_url': player_url,
1715             }]
1716         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1717             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1718             if 'rtmpe%3Dyes' in encoded_url_map:
1719                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1720             formats_spec = {}
1721             fmt_list = video_info.get('fmt_list', [''])[0]
1722             if fmt_list:
1723                 for fmt in fmt_list.split(','):
1724                     spec = fmt.split('/')
1725                     if len(spec) > 1:
1726                         width_height = spec[1].split('x')
1727                         if len(width_height) == 2:
1728                             formats_spec[spec[0]] = {
1729                                 'resolution': spec[1],
1730                                 'width': int_or_none(width_height[0]),
1731                                 'height': int_or_none(width_height[1]),
1732                             }
1733             q = qualities(['small', 'medium', 'hd720'])
1734             formats = []
1735             for url_data_str in encoded_url_map.split(','):
1736                 url_data = compat_parse_qs(url_data_str)
1737                 if 'itag' not in url_data or 'url' not in url_data:
1738                     continue
1739                 format_id = url_data['itag'][0]
1740                 url = url_data['url'][0]
1741
1742                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1743                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1744                     jsplayer_url_json = self._search_regex(
1745                         ASSETS_RE,
1746                         embed_webpage if age_gate else video_webpage,
1747                         'JS player URL (1)', default=None)
1748                     if not jsplayer_url_json and not age_gate:
1749                         # We need the embed website after all
1750                         if embed_webpage is None:
1751                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1752                             embed_webpage = self._download_webpage(
1753                                 embed_url, video_id, 'Downloading embed webpage')
1754                         jsplayer_url_json = self._search_regex(
1755                             ASSETS_RE, embed_webpage, 'JS player URL')
1756
1757                     player_url = json.loads(jsplayer_url_json)
1758                     if player_url is None:
1759                         player_url_json = self._search_regex(
1760                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1761                             video_webpage, 'age gate player URL')
1762                         player_url = json.loads(player_url_json)
1763
1764                 if 'sig' in url_data:
1765                     url += '&signature=' + url_data['sig'][0]
1766                 elif 's' in url_data:
1767                     encrypted_sig = url_data['s'][0]
1768
1769                     if self._downloader.params.get('verbose'):
1770                         if player_url is None:
1771                             player_version = 'unknown'
1772                             player_desc = 'unknown'
1773                         else:
1774                             if player_url.endswith('swf'):
1775                                 player_version = self._search_regex(
1776                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1777                                     'flash player', fatal=False)
1778                                 player_desc = 'flash player %s' % player_version
1779                             else:
1780                                 player_version = self._search_regex(
1781                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1782                                      r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1783                                     player_url,
1784                                     'html5 player', fatal=False)
1785                                 player_desc = 'html5 player %s' % player_version
1786
1787                         parts_sizes = self._signature_cache_id(encrypted_sig)
1788                         self.to_screen('{%s} signature length %s, %s' %
1789                                        (format_id, parts_sizes, player_desc))
1790
1791                     signature = self._decrypt_signature(
1792                         encrypted_sig, video_id, player_url, age_gate)
1793                     url += '&signature=' + signature
1794                 if 'ratebypass' not in url:
1795                     url += '&ratebypass=yes'
1796
1797                 dct = {
1798                     'format_id': format_id,
1799                     'url': url,
1800                     'player_url': player_url,
1801                 }
1802                 if format_id in self._formats:
1803                     dct.update(self._formats[format_id])
1804                 if format_id in formats_spec:
1805                     dct.update(formats_spec[format_id])
1806
1807                 # Some itags are not included in DASH manifest thus corresponding formats will
1808                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1809                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1810                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1811                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1812
1813                 filesize = int_or_none(url_data.get(
1814                     'clen', [None])[0]) or _extract_filesize(url)
1815
1816                 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1817
1818                 more_fields = {
1819                     'filesize': filesize,
1820                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1821                     'width': width,
1822                     'height': height,
1823                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1824                     'format_note': quality,
1825                     'quality': q(quality),
1826                 }
1827                 for key, value in more_fields.items():
1828                     if value:
1829                         dct[key] = value
1830                 type_ = url_data.get('type', [None])[0]
1831                 if type_:
1832                     type_split = type_.split(';')
1833                     kind_ext = type_split[0].split('/')
1834                     if len(kind_ext) == 2:
1835                         kind, _ = kind_ext
1836                         dct['ext'] = mimetype2ext(type_split[0])
1837                         if kind in ('audio', 'video'):
1838                             codecs = None
1839                             for mobj in re.finditer(
1840                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1841                                 if mobj.group('key') == 'codecs':
1842                                     codecs = mobj.group('val')
1843                                     break
1844                             if codecs:
1845                                 dct.update(parse_codecs(codecs))
1846                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1847                     dct['downloader_options'] = {
1848                         # Youtube throttles chunks >~10M
1849                         'http_chunk_size': 10485760,
1850                     }
1851                 formats.append(dct)
1852         elif video_info.get('hlsvp'):
1853             manifest_url = video_info['hlsvp'][0]
1854             formats = []
1855             m3u8_formats = self._extract_m3u8_formats(
1856                 manifest_url, video_id, 'mp4', fatal=False)
1857             for a_format in m3u8_formats:
1858                 itag = self._search_regex(
1859                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1860                 if itag:
1861                     a_format['format_id'] = itag
1862                     if itag in self._formats:
1863                         dct = self._formats[itag].copy()
1864                         dct.update(a_format)
1865                         a_format = dct
1866                 a_format['player_url'] = player_url
1867                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1868                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1869                 formats.append(a_format)
1870         else:
1871             error_message = clean_html(video_info.get('reason', [None])[0])
1872             if not error_message:
1873                 error_message = extract_unavailable_message()
1874             if error_message:
1875                 raise ExtractorError(error_message, expected=True)
1876             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1877
1878         # uploader
1879         video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1880         if video_uploader:
1881             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1882         else:
1883             self._downloader.report_warning('unable to extract uploader name')
1884
1885         # uploader_id
1886         video_uploader_id = None
1887         video_uploader_url = None
1888         mobj = re.search(
1889             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1890             video_webpage)
1891         if mobj is not None:
1892             video_uploader_id = mobj.group('uploader_id')
1893             video_uploader_url = mobj.group('uploader_url')
1894         else:
1895             self._downloader.report_warning('unable to extract uploader nickname')
1896
1897         # thumbnail image
1898         # We try first to get a high quality image:
1899         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1900                             video_webpage, re.DOTALL)
1901         if m_thumb is not None:
1902             video_thumbnail = m_thumb.group(1)
1903         elif 'thumbnail_url' not in video_info:
1904             self._downloader.report_warning('unable to extract video thumbnail')
1905             video_thumbnail = None
1906         else:   # don't panic if we can't find it
1907             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1908
1909         # upload date
1910         upload_date = self._html_search_meta(
1911             'datePublished', video_webpage, 'upload date', default=None)
1912         if not upload_date:
1913             upload_date = self._search_regex(
1914                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1915                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1916                 video_webpage, 'upload date', default=None)
1917         upload_date = unified_strdate(upload_date)
1918
1919         video_license = self._html_search_regex(
1920             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1921             video_webpage, 'license', default=None)
1922
1923         m_music = re.search(
1924             r'''(?x)
1925                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1926                 <ul[^>]*>\s*
1927                 <li>(?P<title>.+?)
1928                 by (?P<creator>.+?)
1929                 (?:
1930                     \(.+?\)|
1931                     <a[^>]*
1932                         (?:
1933                             \bhref=["\']/red[^>]*>|             # drop possible
1934                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
1935                         )
1936                     .*?
1937                 )?</li
1938             ''',
1939             video_webpage)
1940         if m_music:
1941             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1942             video_creator = clean_html(m_music.group('creator'))
1943         else:
1944             video_alt_title = video_creator = None
1945
1946         def extract_meta(field):
1947             return self._html_search_regex(
1948                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1949                 video_webpage, field, default=None)
1950
1951         track = extract_meta('Song')
1952         artist = extract_meta('Artist')
1953
1954         m_episode = re.search(
1955             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1956             video_webpage)
1957         if m_episode:
1958             series = m_episode.group('series')
1959             season_number = int(m_episode.group('season'))
1960             episode_number = int(m_episode.group('episode'))
1961         else:
1962             series = season_number = episode_number = None
1963
1964         m_cat_container = self._search_regex(
1965             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1966             video_webpage, 'categories', default=None)
1967         if m_cat_container:
1968             category = self._html_search_regex(
1969                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1970                 default=None)
1971             video_categories = None if category is None else [category]
1972         else:
1973             video_categories = None
1974
1975         video_tags = [
1976             unescapeHTML(m.group('content'))
1977             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1978
1979         def _extract_count(count_name):
1980             return str_to_int(self._search_regex(
1981                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1982                 % re.escape(count_name),
1983                 video_webpage, count_name, default=None))
1984
1985         like_count = _extract_count('like')
1986         dislike_count = _extract_count('dislike')
1987
1988         # subtitles
1989         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1990         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1991
1992         video_duration = try_get(
1993             video_info, lambda x: int_or_none(x['length_seconds'][0]))
1994         if not video_duration:
1995             video_duration = parse_duration(self._html_search_meta(
1996                 'duration', video_webpage, 'video duration'))
1997
1998         # annotations
1999         video_annotations = None
2000         if self._downloader.params.get('writeannotations', False):
2001             video_annotations = self._extract_annotations(video_id)
2002
2003         chapters = self._extract_chapters(description_original, video_duration)
2004
2005         # Look for the DASH manifest
2006         if self._downloader.params.get('youtube_include_dash_manifest', True):
2007             dash_mpd_fatal = True
2008             for mpd_url in dash_mpds:
2009                 dash_formats = {}
2010                 try:
2011                     def decrypt_sig(mobj):
2012                         s = mobj.group(1)
2013                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2014                         return '/signature/%s' % dec_s
2015
2016                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2017
2018                     for df in self._extract_mpd_formats(
2019                             mpd_url, video_id, fatal=dash_mpd_fatal,
2020                             formats_dict=self._formats):
2021                         if not df.get('filesize'):
2022                             df['filesize'] = _extract_filesize(df['url'])
2023                         # Do not overwrite DASH format found in some previous DASH manifest
2024                         if df['format_id'] not in dash_formats:
2025                             dash_formats[df['format_id']] = df
2026                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2027                         # allow them to fail without bug report message if we already have
2028                         # some DASH manifest succeeded. This is temporary workaround to reduce
2029                         # burst of bug reports until we figure out the reason and whether it
2030                         # can be fixed at all.
2031                         dash_mpd_fatal = False
2032                 except (ExtractorError, KeyError) as e:
2033                     self.report_warning(
2034                         'Skipping DASH manifest: %r' % e, video_id)
2035                 if dash_formats:
2036                     # Remove the formats we found through non-DASH, they
2037                     # contain less info and it can be wrong, because we use
2038                     # fixed values (for example the resolution). See
2039                     # https://github.com/rg3/youtube-dl/issues/5774 for an
2040                     # example.
2041                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2042                     formats.extend(dash_formats.values())
2043
2044         # Check for malformed aspect ratio
2045         stretched_m = re.search(
2046             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2047             video_webpage)
2048         if stretched_m:
2049             w = float(stretched_m.group('w'))
2050             h = float(stretched_m.group('h'))
2051             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2052             # We will only process correct ratios.
2053             if w > 0 and h > 0:
2054                 ratio = w / h
2055                 for f in formats:
2056                     if f.get('vcodec') != 'none':
2057                         f['stretched_ratio'] = ratio
2058
2059         self._sort_formats(formats)
2060
2061         self.mark_watched(video_id, video_info)
2062
2063         return {
2064             'id': video_id,
2065             'uploader': video_uploader,
2066             'uploader_id': video_uploader_id,
2067             'uploader_url': video_uploader_url,
2068             'upload_date': upload_date,
2069             'license': video_license,
2070             'creator': video_creator or artist,
2071             'title': video_title,
2072             'alt_title': video_alt_title or track,
2073             'thumbnail': video_thumbnail,
2074             'description': video_description,
2075             'categories': video_categories,
2076             'tags': video_tags,
2077             'subtitles': video_subtitles,
2078             'automatic_captions': automatic_captions,
2079             'duration': video_duration,
2080             'age_limit': 18 if age_gate else 0,
2081             'annotations': video_annotations,
2082             'chapters': chapters,
2083             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2084             'view_count': view_count,
2085             'like_count': like_count,
2086             'dislike_count': dislike_count,
2087             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2088             'formats': formats,
2089             'is_live': is_live,
2090             'start_time': start_time,
2091             'end_time': end_time,
2092             'series': series,
2093             'season_number': season_number,
2094             'episode_number': episode_number,
2095             'track': track,
2096             'artist': artist,
2097         }
2098
2099
2100 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2101     IE_DESC = 'YouTube.com playlists'
2102     _VALID_URL = r"""(?x)(?:
2103                         (?:https?://)?
2104                         (?:\w+\.)?
2105                         (?:
2106                             youtube\.com/
2107                             (?:
2108                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2109                                \? (?:.*?[&;])*? (?:p|a|list)=
2110                             |  p/
2111                             )|
2112                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2113                         )
2114                         (
2115                             (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
2116                             # Top tracks, they can also include dots
2117                             |(?:MC)[\w\.]*
2118                         )
2119                         .*
2120                      |
2121                         (%(playlist_id)s)
2122                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2123     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2124     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2125     IE_NAME = 'youtube:playlist'
2126     _TESTS = [{
2127         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2128         'info_dict': {
2129             'title': 'ytdl test PL',
2130             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2131         },
2132         'playlist_count': 3,
2133     }, {
2134         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2135         'info_dict': {
2136             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2137             'title': 'YDL_Empty_List',
2138         },
2139         'playlist_count': 0,
2140         'skip': 'This playlist is private',
2141     }, {
2142         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2143         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2144         'info_dict': {
2145             'title': '29C3: Not my department',
2146             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2147         },
2148         'playlist_count': 95,
2149     }, {
2150         'note': 'issue #673',
2151         'url': 'PLBB231211A4F62143',
2152         'info_dict': {
2153             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2154             'id': 'PLBB231211A4F62143',
2155         },
2156         'playlist_mincount': 26,
2157     }, {
2158         'note': 'Large playlist',
2159         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2160         'info_dict': {
2161             'title': 'Uploads from Cauchemar',
2162             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2163         },
2164         'playlist_mincount': 799,
2165     }, {
2166         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2167         'info_dict': {
2168             'title': 'YDL_safe_search',
2169             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2170         },
2171         'playlist_count': 2,
2172         'skip': 'This playlist is private',
2173     }, {
2174         'note': 'embedded',
2175         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2176         'playlist_count': 4,
2177         'info_dict': {
2178             'title': 'JODA15',
2179             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2180         }
2181     }, {
2182         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2183         'playlist_mincount': 485,
2184         'info_dict': {
2185             'title': '2017 華語最新單曲 (2/24更新)',
2186             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2187         }
2188     }, {
2189         'note': 'Embedded SWF player',
2190         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2191         'playlist_count': 4,
2192         'info_dict': {
2193             'title': 'JODA7',
2194             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2195         }
2196     }, {
2197         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2198         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2199         'info_dict': {
2200             'title': 'Uploads from Interstellar Movie',
2201             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2202         },
2203         'playlist_mincount': 21,
2204     }, {
2205         # Playlist URL that does not actually serve a playlist
2206         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2207         'info_dict': {
2208             'id': 'FqZTN594JQw',
2209             'ext': 'webm',
2210             'title': "Smiley's People 01 detective, Adventure Series, Action",
2211             'uploader': 'STREEM',
2212             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2213             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2214             'upload_date': '20150526',
2215             'license': 'Standard YouTube License',
2216             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2217             'categories': ['People & Blogs'],
2218             'tags': list,
2219             'like_count': int,
2220             'dislike_count': int,
2221         },
2222         'params': {
2223             'skip_download': True,
2224         },
2225         'add_ie': [YoutubeIE.ie_key()],
2226     }, {
2227         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2228         'info_dict': {
2229             'id': 'yeWKywCrFtk',
2230             'ext': 'mp4',
2231             'title': 'Small Scale Baler and Braiding Rugs',
2232             'uploader': 'Backus-Page House Museum',
2233             'uploader_id': 'backuspagemuseum',
2234             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2235             'upload_date': '20161008',
2236             'license': 'Standard YouTube License',
2237             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2238             'categories': ['Nonprofits & Activism'],
2239             'tags': list,
2240             'like_count': int,
2241             'dislike_count': int,
2242         },
2243         'params': {
2244             'noplaylist': True,
2245             'skip_download': True,
2246         },
2247     }, {
2248         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2249         'only_matching': True,
2250     }, {
2251         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2252         'only_matching': True,
2253     }]
2254
2255     def _real_initialize(self):
2256         self._login()
2257
2258     def _extract_mix(self, playlist_id):
2259         # The mixes are generated from a single video
2260         # the id of the playlist is just 'RD' + video_id
2261         ids = []
2262         last_id = playlist_id[-11:]
2263         for n in itertools.count(1):
2264             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2265             webpage = self._download_webpage(
2266                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2267             new_ids = orderedSet(re.findall(
2268                 r'''(?xs)data-video-username=".*?".*?
2269                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2270                 webpage))
2271             # Fetch new pages until all the videos are repeated, it seems that
2272             # there are always 51 unique videos.
2273             new_ids = [_id for _id in new_ids if _id not in ids]
2274             if not new_ids:
2275                 break
2276             ids.extend(new_ids)
2277             last_id = ids[-1]
2278
2279         url_results = self._ids_to_results(ids)
2280
2281         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2282         title_span = (
2283             search_title('playlist-title') or
2284             search_title('title long-title') or
2285             search_title('title'))
2286         title = clean_html(title_span)
2287
2288         return self.playlist_result(url_results, playlist_id, title)
2289
2290     def _extract_playlist(self, playlist_id):
2291         url = self._TEMPLATE_URL % playlist_id
2292         page = self._download_webpage(url, playlist_id)
2293
2294         # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2295         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2296             match = match.strip()
2297             # Check if the playlist exists or is private
2298             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2299             if mobj:
2300                 reason = mobj.group('reason')
2301                 message = 'This playlist %s' % reason
2302                 if 'private' in reason:
2303                     message += ', use --username or --netrc to access it'
2304                 message += '.'
2305                 raise ExtractorError(message, expected=True)
2306             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2307                 raise ExtractorError(
2308                     'Invalid parameters. Maybe URL is incorrect.',
2309                     expected=True)
2310             elif re.match(r'[^<]*Choose your language[^<]*', match):
2311                 continue
2312             else:
2313                 self.report_warning('Youtube gives an alert message: ' + match)
2314
2315         playlist_title = self._html_search_regex(
2316             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2317             page, 'title', default=None)
2318
2319         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2320         uploader = self._search_regex(
2321             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2322             page, 'uploader', default=None)
2323         mobj = re.search(
2324             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2325             page)
2326         if mobj:
2327             uploader_id = mobj.group('uploader_id')
2328             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2329         else:
2330             uploader_id = uploader_url = None
2331
2332         has_videos = True
2333
2334         if not playlist_title:
2335             try:
2336                 # Some playlist URLs don't actually serve a playlist (e.g.
2337                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2338                 next(self._entries(page, playlist_id))
2339             except StopIteration:
2340                 has_videos = False
2341
2342         playlist = self.playlist_result(
2343             self._entries(page, playlist_id), playlist_id, playlist_title)
2344         playlist.update({
2345             'uploader': uploader,
2346             'uploader_id': uploader_id,
2347             'uploader_url': uploader_url,
2348         })
2349
2350         return has_videos, playlist
2351
2352     def _check_download_just_video(self, url, playlist_id):
2353         # Check if it's a video-specific URL
2354         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2355         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2356             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2357             'video id', default=None)
2358         if video_id:
2359             if self._downloader.params.get('noplaylist'):
2360                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2361                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2362             else:
2363                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2364                 return video_id, None
2365         return None, None
2366
2367     def _real_extract(self, url):
2368         # Extract playlist id
2369         mobj = re.match(self._VALID_URL, url)
2370         if mobj is None:
2371             raise ExtractorError('Invalid URL: %s' % url)
2372         playlist_id = mobj.group(1) or mobj.group(2)
2373
2374         video_id, video = self._check_download_just_video(url, playlist_id)
2375         if video:
2376             return video
2377
2378         if playlist_id.startswith(('RD', 'UL', 'PU')):
2379             # Mixes require a custom extraction process
2380             return self._extract_mix(playlist_id)
2381
2382         has_videos, playlist = self._extract_playlist(playlist_id)
2383         if has_videos or not video_id:
2384             return playlist
2385
2386         # Some playlist URLs don't actually serve a playlist (see
2387         # https://github.com/rg3/youtube-dl/issues/10537).
2388         # Fallback to plain video extraction if there is a video id
2389         # along with playlist id.
2390         return self.url_result(video_id, 'Youtube', video_id=video_id)
2391
2392
2393 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2394     IE_DESC = 'YouTube.com channels'
2395     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2396     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2397     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2398     IE_NAME = 'youtube:channel'
2399     _TESTS = [{
2400         'note': 'paginated channel',
2401         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2402         'playlist_mincount': 91,
2403         'info_dict': {
2404             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2405             'title': 'Uploads from lex will',
2406         }
2407     }, {
2408         'note': 'Age restricted channel',
2409         # from https://www.youtube.com/user/DeusExOfficial
2410         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2411         'playlist_mincount': 64,
2412         'info_dict': {
2413             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2414             'title': 'Uploads from Deus Ex',
2415         },
2416     }]
2417
2418     @classmethod
2419     def suitable(cls, url):
2420         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2421                 else super(YoutubeChannelIE, cls).suitable(url))
2422
2423     def _build_template_url(self, url, channel_id):
2424         return self._TEMPLATE_URL % channel_id
2425
2426     def _real_extract(self, url):
2427         channel_id = self._match_id(url)
2428
2429         url = self._build_template_url(url, channel_id)
2430
2431         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2432         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2433         # otherwise fallback on channel by page extraction
2434         channel_page = self._download_webpage(
2435             url + '?view=57', channel_id,
2436             'Downloading channel page', fatal=False)
2437         if channel_page is False:
2438             channel_playlist_id = False
2439         else:
2440             channel_playlist_id = self._html_search_meta(
2441                 'channelId', channel_page, 'channel id', default=None)
2442             if not channel_playlist_id:
2443                 channel_url = self._html_search_meta(
2444                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2445                     channel_page, 'channel url', default=None)
2446                 if channel_url:
2447                     channel_playlist_id = self._search_regex(
2448                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2449                         channel_url, 'channel id', default=None)
2450         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2451             playlist_id = 'UU' + channel_playlist_id[2:]
2452             return self.url_result(
2453                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2454
2455         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2456         autogenerated = re.search(r'''(?x)
2457                 class="[^"]*?(?:
2458                     channel-header-autogenerated-label|
2459                     yt-channel-title-autogenerated
2460                 )[^"]*"''', channel_page) is not None
2461
2462         if autogenerated:
2463             # The videos are contained in a single page
2464             # the ajax pages can't be used, they are empty
2465             entries = [
2466                 self.url_result(
2467                     video_id, 'Youtube', video_id=video_id,
2468                     video_title=video_title)
2469                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2470             return self.playlist_result(entries, channel_id)
2471
2472         try:
2473             next(self._entries(channel_page, channel_id))
2474         except StopIteration:
2475             alert_message = self._html_search_regex(
2476                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2477                 channel_page, 'alert', default=None, group='alert')
2478             if alert_message:
2479                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2480
2481         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2482
2483
2484 class YoutubeUserIE(YoutubeChannelIE):
2485     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2486     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2487     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2488     IE_NAME = 'youtube:user'
2489
2490     _TESTS = [{
2491         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2492         'playlist_mincount': 320,
2493         'info_dict': {
2494             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2495             'title': 'Uploads from The Linux Foundation',
2496         }
2497     }, {
2498         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2499         # but not https://www.youtube.com/user/12minuteathlete/videos
2500         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2501         'playlist_mincount': 249,
2502         'info_dict': {
2503             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2504             'title': 'Uploads from 12 Minute Athlete',
2505         }
2506     }, {
2507         'url': 'ytuser:phihag',
2508         'only_matching': True,
2509     }, {
2510         'url': 'https://www.youtube.com/c/gametrailers',
2511         'only_matching': True,
2512     }, {
2513         'url': 'https://www.youtube.com/gametrailers',
2514         'only_matching': True,
2515     }, {
2516         # This channel is not available, geo restricted to JP
2517         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2518         'only_matching': True,
2519     }]
2520
2521     @classmethod
2522     def suitable(cls, url):
2523         # Don't return True if the url can be extracted with other youtube
2524         # extractor, the regex would is too permissive and it would match.
2525         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2526         if any(ie.suitable(url) for ie in other_yt_ies):
2527             return False
2528         else:
2529             return super(YoutubeUserIE, cls).suitable(url)
2530
2531     def _build_template_url(self, url, channel_id):
2532         mobj = re.match(self._VALID_URL, url)
2533         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2534
2535
2536 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2537     IE_DESC = 'YouTube.com live streams'
2538     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2539     IE_NAME = 'youtube:live'
2540
2541     _TESTS = [{
2542         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2543         'info_dict': {
2544             'id': 'a48o2S1cPoo',
2545             'ext': 'mp4',
2546             'title': 'The Young Turks - Live Main Show',
2547             'uploader': 'The Young Turks',
2548             'uploader_id': 'TheYoungTurks',
2549             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2550             'upload_date': '20150715',
2551             'license': 'Standard YouTube License',
2552             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2553             'categories': ['News & Politics'],
2554             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2555             'like_count': int,
2556             'dislike_count': int,
2557         },
2558         'params': {
2559             'skip_download': True,
2560         },
2561     }, {
2562         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2563         'only_matching': True,
2564     }, {
2565         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2566         'only_matching': True,
2567     }, {
2568         'url': 'https://www.youtube.com/TheYoungTurks/live',
2569         'only_matching': True,
2570     }]
2571
2572     def _real_extract(self, url):
2573         mobj = re.match(self._VALID_URL, url)
2574         channel_id = mobj.group('id')
2575         base_url = mobj.group('base_url')
2576         webpage = self._download_webpage(url, channel_id, fatal=False)
2577         if webpage:
2578             page_type = self._og_search_property(
2579                 'type', webpage, 'page type', default='')
2580             video_id = self._html_search_meta(
2581                 'videoId', webpage, 'video id', default=None)
2582             if page_type.startswith('video') and video_id and re.match(
2583                     r'^[0-9A-Za-z_-]{11}$', video_id):
2584                 return self.url_result(video_id, YoutubeIE.ie_key())
2585         return self.url_result(base_url)
2586
2587
2588 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2589     IE_DESC = 'YouTube.com user/channel playlists'
2590     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2591     IE_NAME = 'youtube:playlists'
2592
2593     _TESTS = [{
2594         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2595         'playlist_mincount': 4,
2596         'info_dict': {
2597             'id': 'ThirstForScience',
2598             'title': 'Thirst for Science',
2599         },
2600     }, {
2601         # with "Load more" button
2602         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2603         'playlist_mincount': 70,
2604         'info_dict': {
2605             'id': 'igorkle1',
2606             'title': 'Игорь Клейнер',
2607         },
2608     }, {
2609         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2610         'playlist_mincount': 17,
2611         'info_dict': {
2612             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2613             'title': 'Chem Player',
2614         },
2615     }]
2616
2617
2618 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2619     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2620
2621
2622 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2623     IE_DESC = 'YouTube.com searches'
2624     # there doesn't appear to be a real limit, for example if you search for
2625     # 'python' you get more than 8.000.000 results
2626     _MAX_RESULTS = float('inf')
2627     IE_NAME = 'youtube:search'
2628     _SEARCH_KEY = 'ytsearch'
2629     _EXTRA_QUERY_ARGS = {}
2630     _TESTS = []
2631
2632     def _get_n_results(self, query, n):
2633         """Get a specified number of results for a query"""
2634
2635         videos = []
2636         limit = n
2637
2638         url_query = {
2639             'search_query': query.encode('utf-8'),
2640         }
2641         url_query.update(self._EXTRA_QUERY_ARGS)
2642         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2643
2644         for pagenum in itertools.count(1):
2645             data = self._download_json(
2646                 result_url, video_id='query "%s"' % query,
2647                 note='Downloading page %s' % pagenum,
2648                 errnote='Unable to download API page',
2649                 query={'spf': 'navigate'})
2650             html_content = data[1]['body']['content']
2651
2652             if 'class="search-message' in html_content:
2653                 raise ExtractorError(
2654                     '[youtube] No video results', expected=True)
2655
2656             new_videos = list(self._process_page(html_content))
2657             videos += new_videos
2658             if not new_videos or len(videos) > limit:
2659                 break
2660             next_link = self._html_search_regex(
2661                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2662                 html_content, 'next link', default=None)
2663             if next_link is None:
2664                 break
2665             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2666
2667         if len(videos) > n:
2668             videos = videos[:n]
2669         return self.playlist_result(videos, query)
2670
2671
2672 class YoutubeSearchDateIE(YoutubeSearchIE):
2673     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2674     _SEARCH_KEY = 'ytsearchdate'
2675     IE_DESC = 'YouTube.com searches, newest videos first'
2676     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2677
2678
2679 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2680     IE_DESC = 'YouTube.com search URLs'
2681     IE_NAME = 'youtube:search_url'
2682     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2683     _TESTS = [{
2684         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2685         'playlist_mincount': 5,
2686         'info_dict': {
2687             'title': 'youtube-dl test video',
2688         }
2689     }, {
2690         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2691         'only_matching': True,
2692     }]
2693
2694     def _real_extract(self, url):
2695         mobj = re.match(self._VALID_URL, url)
2696         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2697         webpage = self._download_webpage(url, query)
2698         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2699
2700
2701 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2702     IE_DESC = 'YouTube.com (multi-season) shows'
2703     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2704     IE_NAME = 'youtube:show'
2705     _TESTS = [{
2706         'url': 'https://www.youtube.com/show/airdisasters',
2707         'playlist_mincount': 5,
2708         'info_dict': {
2709             'id': 'airdisasters',
2710             'title': 'Air Disasters',
2711         }
2712     }]
2713
2714     def _real_extract(self, url):
2715         playlist_id = self._match_id(url)
2716         return super(YoutubeShowIE, self)._real_extract(
2717             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2718
2719
2720 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2721     """
2722     Base class for feed extractors
2723     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2724     """
2725     _LOGIN_REQUIRED = True
2726
2727     @property
2728     def IE_NAME(self):
2729         return 'youtube:%s' % self._FEED_NAME
2730
2731     def _real_initialize(self):
2732         self._login()
2733
2734     def _entries(self, page):
2735         # The extraction process is the same as for playlists, but the regex
2736         # for the video ids doesn't contain an index
2737         ids = []
2738         more_widget_html = content_html = page
2739         for page_num in itertools.count(1):
2740             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2741
2742             # 'recommended' feed has infinite 'load more' and each new portion spins
2743             # the same videos in (sometimes) slightly different order, so we'll check
2744             # for unicity and break when portion has no new videos
2745             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2746             if not new_ids:
2747                 break
2748
2749             ids.extend(new_ids)
2750
2751             for entry in self._ids_to_results(new_ids):
2752                 yield entry
2753
2754             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2755             if not mobj:
2756                 break
2757
2758             more = self._download_json(
2759                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2760                 'Downloading page #%s' % page_num,
2761                 transform_source=uppercase_escape)
2762             content_html = more['content_html']
2763             more_widget_html = more['load_more_widget_html']
2764
2765     def _real_extract(self, url):
2766         page = self._download_webpage(
2767             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2768             self._PLAYLIST_TITLE)
2769         return self.playlist_result(
2770             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2771
2772
2773 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2774     IE_NAME = 'youtube:watchlater'
2775     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2776     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2777
2778     _TESTS = [{
2779         'url': 'https://www.youtube.com/playlist?list=WL',
2780         'only_matching': True,
2781     }, {
2782         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2783         'only_matching': True,
2784     }]
2785
2786     def _real_extract(self, url):
2787         _, video = self._check_download_just_video(url, 'WL')
2788         if video:
2789             return video
2790         _, playlist = self._extract_playlist('WL')
2791         return playlist
2792
2793
2794 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2795     IE_NAME = 'youtube:favorites'
2796     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2797     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2798     _LOGIN_REQUIRED = True
2799
2800     def _real_extract(self, url):
2801         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2802         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2803         return self.url_result(playlist_id, 'YoutubePlaylist')
2804
2805
2806 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2807     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2808     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2809     _FEED_NAME = 'recommended'
2810     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2811
2812
2813 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2814     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2815     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2816     _FEED_NAME = 'subscriptions'
2817     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2818
2819
2820 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2821     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2822     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2823     _FEED_NAME = 'history'
2824     _PLAYLIST_TITLE = 'Youtube History'
2825
2826
2827 class YoutubeTruncatedURLIE(InfoExtractor):
2828     IE_NAME = 'youtube:truncated_url'
2829     IE_DESC = False  # Do not list
2830     _VALID_URL = r'''(?x)
2831         (?:https?://)?
2832         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2833         (?:watch\?(?:
2834             feature=[a-z_]+|
2835             annotation_id=annotation_[^&]+|
2836             x-yt-cl=[0-9]+|
2837             hl=[^&]*|
2838             t=[0-9]+
2839         )?
2840         |
2841             attribution_link\?a=[^&]+
2842         )
2843         $
2844     '''
2845
2846     _TESTS = [{
2847         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2848         'only_matching': True,
2849     }, {
2850         'url': 'https://www.youtube.com/watch?',
2851         'only_matching': True,
2852     }, {
2853         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2854         'only_matching': True,
2855     }, {
2856         'url': 'https://www.youtube.com/watch?feature=foo',
2857         'only_matching': True,
2858     }, {
2859         'url': 'https://www.youtube.com/watch?hl=en-GB',
2860         'only_matching': True,
2861     }, {
2862         'url': 'https://www.youtube.com/watch?t=2372',
2863         'only_matching': True,
2864     }]
2865
2866     def _real_extract(self, url):
2867         raise ExtractorError(
2868             'Did you forget to quote the URL? Remember that & is a meta '
2869             'character in most shells, so you want to put the URL in quotes, '
2870             'like  youtube-dl '
2871             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2872             ' or simply  youtube-dl BaW_jenozKc  .',
2873             expected=True)
2874
2875
2876 class YoutubeTruncatedIDIE(InfoExtractor):
2877     IE_NAME = 'youtube:truncated_id'
2878     IE_DESC = False  # Do not list
2879     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2880
2881     _TESTS = [{
2882         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2883         'only_matching': True,
2884     }]
2885
2886     def _real_extract(self, url):
2887         video_id = self._match_id(url)
2888         raise ExtractorError(
2889             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2890             expected=True)