Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import random
  10 import re
  11 import time
  12 import traceback
  13
  14 from .common import InfoExtractor, SearchInfoExtractor
  15 from ..jsinterp import JSInterpreter
  16 from ..swfinterp import SWFInterpreter
  17 from ..compat import (
  18     compat_chr,
  19     compat_kwargs,
  20     compat_parse_qs,
  21     compat_urllib_parse_unquote,
  22     compat_urllib_parse_unquote_plus,
  23     compat_urllib_parse_urlencode,
  24     compat_urllib_parse_urlparse,
  25     compat_urlparse,
  26     compat_str,
  27 )
  28 from ..utils import (
  29     clean_html,
  30     error_to_compat_str,
  31     ExtractorError,
  32     float_or_none,
  33     get_element_by_attribute,
  34     get_element_by_id,
  35     int_or_none,
  36     mimetype2ext,
  37     orderedSet,
  38     parse_codecs,
  39     parse_duration,
  40     qualities,
  41     remove_quotes,
  42     remove_start,
  43     smuggle_url,
  44     str_to_int,
  45     try_get,
  46     unescapeHTML,
  47     unified_strdate,
  48     unsmuggle_url,
  49     uppercase_escape,
  50     urlencode_postdata,
  51 )
  52
  53
  54 class YoutubeBaseInfoExtractor(InfoExtractor):
  55     """Provide base functions for Youtube extractors"""
  56     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  57     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  58
  59     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  60     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  61     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  62
  63     _NETRC_MACHINE = 'youtube'
  64     # If True it will raise an error if no login info is provided
  65     _LOGIN_REQUIRED = False
  66
  67     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  68
  69     def _set_language(self):
  70         self._set_cookie(
  71             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  72             # YouTube sets the expire time to about two months
  73             expire_time=time.time() + 2 * 30 * 24 * 3600)
  74
  75     def _ids_to_results(self, ids):
  76         return [
  77             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  78             for vid_id in ids]
  79
  80     def _login(self):
  81         """
  82         Attempt to log in to YouTube.
  83         True is returned if successful or skipped.
  84         False is returned if login failed.
  85
  86         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  87         """
  88         username, password = self._get_login_info()
  89         # No authentication to be performed
  90         if username is None:
  91             if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
  92                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  93             return True
  94
  95         login_page = self._download_webpage(
  96             self._LOGIN_URL, None,
  97             note='Downloading login page',
  98             errnote='unable to fetch login page', fatal=False)
  99         if login_page is False:
 100             return
 101
 102         login_form = self._hidden_inputs(login_page)
 103
 104         def req(url, f_req, note, errnote):
 105             data = login_form.copy()
 106             data.update({
 107                 'pstMsg': 1,
 108                 'checkConnection': 'youtube',
 109                 'checkedDomains': 'youtube',
 110                 'hl': 'en',
 111                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 112                 'f.req': json.dumps(f_req),
 113                 'flowName': 'GlifWebSignIn',
 114                 'flowEntry': 'ServiceLogin',
 115             })
 116             return self._download_json(
 117                 url, None, note=note, errnote=errnote,
 118                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 119                 fatal=False,
 120                 data=urlencode_postdata(data), headers={
 121                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 122                     'Google-Accounts-XSRF': 1,
 123                 })
 124
 125         def warn(message):
 126             self._downloader.report_warning(message)
 127
 128         lookup_req = [
 129             username,
 130             None, [], None, 'US', None, None, 2, False, True,
 131             [
 132                 None, None,
 133                 [2, 1, None, 1,
 134                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 135                  None, [], 4],
 136                 1, [None, None, []], None, None, None, True
 137             ],
 138             username,
 139         ]
 140
 141         lookup_results = req(
 142             self._LOOKUP_URL, lookup_req,
 143             'Looking up account info', 'Unable to look up account info')
 144
 145         if lookup_results is False:
 146             return False
 147
 148         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 149         if not user_hash:
 150             warn('Unable to extract user hash')
 151             return False
 152
 153         challenge_req = [
 154             user_hash,
 155             None, 1, None, [1, None, None, None, [password, None, True]],
 156             [
 157                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ]]
 160
 161         challenge_results = req(
 162             self._CHALLENGE_URL, challenge_req,
 163             'Logging in', 'Unable to log in')
 164
 165         if challenge_results is False:
 166             return
 167
 168         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 169         if login_res:
 170             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 171             warn(
 172                 'Unable to login: %s' % 'Invalid password'
 173                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 174             return False
 175
 176         res = try_get(challenge_results, lambda x: x[0][-1], list)
 177         if not res:
 178             warn('Unable to extract result entry')
 179             return False
 180
 181         login_challenge = try_get(res, lambda x: x[0][0], list)
 182         if login_challenge:
 183             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 184             if challenge_str == 'TWO_STEP_VERIFICATION':
 185                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 186                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 187                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 188                 if status == 'QUOTA_EXCEEDED':
 189                     warn('Exceeded the limit of TFA codes, try later')
 190                     return False
 191
 192                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 193                 if not tl:
 194                     warn('Unable to extract TL')
 195                     return False
 196
 197                 tfa_code = self._get_tfa_info('2-step verification code')
 198
 199                 if not tfa_code:
 200                     warn(
 201                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 202                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 203                     return False
 204
 205                 tfa_code = remove_start(tfa_code, 'G-')
 206
 207                 tfa_req = [
 208                     user_hash, None, 2, None,
 209                     [
 210                         9, None, None, None, None, None, None, None,
 211                         [None, tfa_code, True, 2]
 212                     ]]
 213
 214                 tfa_results = req(
 215                     self._TFA_URL.format(tl), tfa_req,
 216                     'Submitting TFA code', 'Unable to submit TFA code')
 217
 218                 if tfa_results is False:
 219                     return False
 220
 221                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 222                 if tfa_res:
 223                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 224                     warn(
 225                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 226                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 227                     return False
 228
 229                 check_cookie_url = try_get(
 230                     tfa_results, lambda x: x[0][-1][2], compat_str)
 231             else:
 232                 CHALLENGES = {
 233                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 234                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 235                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 236                 }
 237                 challenge = CHALLENGES.get(
 238                     challenge_str,
 239                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 240                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 241                 return False
 242         else:
 243             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 244
 245         if not check_cookie_url:
 246             warn('Unable to extract CheckCookie URL')
 247             return False
 248
 249         check_cookie_results = self._download_webpage(
 250             check_cookie_url, None, 'Checking cookie', fatal=False)
 251
 252         if check_cookie_results is False:
 253             return False
 254
 255         if 'https://myaccount.google.com/' not in check_cookie_results:
 256             warn('Unable to log in')
 257             return False
 258
 259         return True
 260
 261     def _download_webpage_handle(self, *args, **kwargs):
 262         kwargs.setdefault('query', {})['disable_polymer'] = 'true'
 263         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
 264             *args, **compat_kwargs(kwargs))
 265
 266     def _real_initialize(self):
 267         if self._downloader is None:
 268             return
 269         self._set_language()
 270         if not self._login():
 271             return
 272
 273
 274 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 275     # Extract entries from page with "Load more" button
 276     def _entries(self, page, playlist_id):
 277         more_widget_html = content_html = page
 278         for page_num in itertools.count(1):
 279             for entry in self._process_page(content_html):
 280                 yield entry
 281
 282             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 283             if not mobj:
 284                 break
 285
 286             more = self._download_json(
 287                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 288                 'Downloading page #%s' % page_num,
 289                 transform_source=uppercase_escape)
 290             content_html = more['content_html']
 291             if not content_html.strip():
 292                 # Some webpages show a "Load more" button but they don't
 293                 # have more videos
 294                 break
 295             more_widget_html = more['load_more_widget_html']
 296
 297
 298 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 299     def _process_page(self, content):
 300         for video_id, video_title in self.extract_videos_from_page(content):
 301             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 302
 303     def extract_videos_from_page(self, page):
 304         ids_in_page = []
 305         titles_in_page = []
 306         for mobj in re.finditer(self._VIDEO_RE, page):
 307             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 308             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 309                 continue
 310             video_id = mobj.group('id')
 311             video_title = unescapeHTML(mobj.group('title'))
 312             if video_title:
 313                 video_title = video_title.strip()
 314             try:
 315                 idx = ids_in_page.index(video_id)
 316                 if video_title and not titles_in_page[idx]:
 317                     titles_in_page[idx] = video_title
 318             except ValueError:
 319                 ids_in_page.append(video_id)
 320                 titles_in_page.append(video_title)
 321         return zip(ids_in_page, titles_in_page)
 322
 323
 324 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 325     def _process_page(self, content):
 326         for playlist_id in orderedSet(re.findall(
 327                 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
 328                 content)):
 329             yield self.url_result(
 330                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 331
 332     def _real_extract(self, url):
 333         playlist_id = self._match_id(url)
 334         webpage = self._download_webpage(url, playlist_id)
 335         title = self._og_search_title(webpage, fatal=False)
 336         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 337
 338
 339 class YoutubeIE(YoutubeBaseInfoExtractor):
 340     IE_DESC = 'YouTube.com'
 341     _VALID_URL = r"""(?x)^
 342                      (
 343                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 344                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 345                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 346                             (?:www\.)?pwnyoutube\.com/|
 347                             (?:www\.)?hooktube\.com/|
 348                             (?:www\.)?yourepeat\.com/|
 349                             tube\.majestyc\.net/|
 350                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 351                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 352                          (?:                                                  # the various things that can precede the ID:
 353                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 354                              |(?:                                             # or the v= param in all its forms
 355                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 356                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 357                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 358                                  v=
 359                              )
 360                          ))
 361                          |(?:
 362                             youtu\.be|                                        # just youtu.be/xxxx
 363                             vid\.plus|                                        # or vid.plus/xxxx
 364                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 365                          )/
 366                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 367                          )
 368                      )?                                                       # all until now is optional -> you can pass the naked ID
 369                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 370                      (?!.*?\blist=
 371                         (?:
 372                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
 373                             WL                                                # WL are handled by the watch later IE
 374                         )
 375                      )
 376                      (?(1).+)?                                                # if we found the ID, everything can follow
 377                      $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 378     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 379     _formats = {
 380         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 381         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 382         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 383         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 384         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 385         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 386         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 387         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 388         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 389         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 390         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 391         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 392         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 393         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 394         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 395         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 396         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 397         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 398
 399
 400         # 3D videos
 401         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 402         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 403         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 404         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 405         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 406         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 407         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 408
 409         # Apple HTTP Live Streaming
 410         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 411         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 412         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 413         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 414         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 415         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 416         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 417         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 418
 419         # DASH mp4 video
 420         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 421         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 422         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 423         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 424         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 425         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 426         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 427         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 428         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 429         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 430         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 431         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 432
 433         # Dash mp4 audio
 434         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 435         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 436         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 437         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 438         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 439         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 440         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 441
 442         # Dash webm
 443         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 444         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 445         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 446         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 447         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 448         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 449         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 450         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 451         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 452         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 453         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 454         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 455         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 456         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 457         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 458         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 459         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 460         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 461         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 462         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 463         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 464         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 465
 466         # Dash webm audio
 467         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 468         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 469
 470         # Dash webm audio with opus inside
 471         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 472         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 473         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 474
 475         # RTMP (unnamed)
 476         '_rtmp': {'protocol': 'rtmp'},
 477     }
 478     _SUBTITLE_FORMATS = ('ttml', 'vtt')
 479
 480     _GEO_BYPASS = False
 481
 482     IE_NAME = 'youtube'
 483     _TESTS = [
 484         {
 485             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 486             'info_dict': {
 487                 'id': 'BaW_jenozKc',
 488                 'ext': 'mp4',
 489                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 490                 'uploader': 'Philipp Hagemeister',
 491                 'uploader_id': 'phihag',
 492                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 493                 'upload_date': '20121002',
 494                 'license': 'Standard YouTube License',
 495                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 496                 'categories': ['Science & Technology'],
 497                 'tags': ['youtube-dl'],
 498                 'duration': 10,
 499                 'like_count': int,
 500                 'dislike_count': int,
 501                 'start_time': 1,
 502                 'end_time': 9,
 503             }
 504         },
 505         {
 506             'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
 507             'note': 'Test generic use_cipher_signature video (#897)',
 508             'info_dict': {
 509                 'id': 'UxxajLWwzqY',
 510                 'ext': 'mp4',
 511                 'upload_date': '20120506',
 512                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 513                 'alt_title': 'I Love It (feat. Charli XCX)',
 514                 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
 515                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 516                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 517                          'iconic ep', 'iconic', 'love', 'it'],
 518                 'duration': 180,
 519                 'uploader': 'Icona Pop',
 520                 'uploader_id': 'IconaPop',
 521                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
 522                 'license': 'Standard YouTube License',
 523                 'creator': 'Icona Pop',
 524                 'track': 'I Love It (feat. Charli XCX)',
 525                 'artist': 'Icona Pop',
 526             }
 527         },
 528         {
 529             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 530             'note': 'Test VEVO video with age protection (#956)',
 531             'info_dict': {
 532                 'id': '07FYdnEawAQ',
 533                 'ext': 'mp4',
 534                 'upload_date': '20130703',
 535                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 536                 'alt_title': 'Tunnel Vision',
 537                 'description': 'md5:64249768eec3bc4276236606ea996373',
 538                 'duration': 419,
 539                 'uploader': 'justintimberlakeVEVO',
 540                 'uploader_id': 'justintimberlakeVEVO',
 541                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
 542                 'license': 'Standard YouTube License',
 543                 'creator': 'Justin Timberlake',
 544                 'track': 'Tunnel Vision',
 545                 'artist': 'Justin Timberlake',
 546                 'age_limit': 18,
 547             }
 548         },
 549         {
 550             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 551             'note': 'Embed-only video (#1746)',
 552             'info_dict': {
 553                 'id': 'yZIXLfi8CZQ',
 554                 'ext': 'mp4',
 555                 'upload_date': '20120608',
 556                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 557                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 558                 'uploader': 'SET India',
 559                 'uploader_id': 'setindia',
 560                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 561                 'license': 'Standard YouTube License',
 562                 'age_limit': 18,
 563             }
 564         },
 565         {
 566             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
 567             'note': 'Use the first video ID in the URL',
 568             'info_dict': {
 569                 'id': 'BaW_jenozKc',
 570                 'ext': 'mp4',
 571                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 572                 'uploader': 'Philipp Hagemeister',
 573                 'uploader_id': 'phihag',
 574                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 575                 'upload_date': '20121002',
 576                 'license': 'Standard YouTube License',
 577                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 578                 'categories': ['Science & Technology'],
 579                 'tags': ['youtube-dl'],
 580                 'duration': 10,
 581                 'like_count': int,
 582                 'dislike_count': int,
 583             },
 584             'params': {
 585                 'skip_download': True,
 586             },
 587         },
 588         {
 589             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 590             'note': '256k DASH audio (format 141) via DASH manifest',
 591             'info_dict': {
 592                 'id': 'a9LDPn-MO4I',
 593                 'ext': 'm4a',
 594                 'upload_date': '20121002',
 595                 'uploader_id': '8KVIDEO',
 596                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 597                 'description': '',
 598                 'uploader': '8KVIDEO',
 599                 'license': 'Standard YouTube License',
 600                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 601             },
 602             'params': {
 603                 'youtube_include_dash_manifest': True,
 604                 'format': '141',
 605             },
 606             'skip': 'format 141 not served anymore',
 607         },
 608         # DASH manifest with encrypted signature
 609         {
 610             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 611             'info_dict': {
 612                 'id': 'IB3lcPjvWLA',
 613                 'ext': 'm4a',
 614                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 615                 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
 616                 'duration': 244,
 617                 'uploader': 'AfrojackVEVO',
 618                 'uploader_id': 'AfrojackVEVO',
 619                 'upload_date': '20131011',
 620                 'license': 'Standard YouTube License',
 621             },
 622             'params': {
 623                 'youtube_include_dash_manifest': True,
 624                 'format': '141/bestaudio[ext=m4a]',
 625             },
 626         },
 627         # JS player signature function name containing $
 628         {
 629             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 630             'info_dict': {
 631                 'id': 'nfWlot6h_JM',
 632                 'ext': 'm4a',
 633                 'title': 'Taylor Swift - Shake It Off',
 634                 'alt_title': 'Shake It Off',
 635                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 636                 'duration': 242,
 637                 'uploader': 'TaylorSwiftVEVO',
 638                 'uploader_id': 'TaylorSwiftVEVO',
 639                 'upload_date': '20140818',
 640                 'license': 'Standard YouTube License',
 641                 'creator': 'Taylor Swift',
 642             },
 643             'params': {
 644                 'youtube_include_dash_manifest': True,
 645                 'format': '141/bestaudio[ext=m4a]',
 646             },
 647         },
 648         # Controversy video
 649         {
 650             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 651             'info_dict': {
 652                 'id': 'T4XJQO3qol8',
 653                 'ext': 'mp4',
 654                 'duration': 219,
 655                 'upload_date': '20100909',
 656                 'uploader': 'TJ Kirk',
 657                 'uploader_id': 'TheAmazingAtheist',
 658                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
 659                 'license': 'Standard YouTube License',
 660                 'title': 'Burning Everyone\'s Koran',
 661                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 662             }
 663         },
 664         # Normal age-gate video (No vevo, embed allowed)
 665         {
 666             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
 667             'info_dict': {
 668                 'id': 'HtVdAasjOgU',
 669                 'ext': 'mp4',
 670                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 671                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 672                 'duration': 142,
 673                 'uploader': 'The Witcher',
 674                 'uploader_id': 'WitcherGame',
 675                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
 676                 'upload_date': '20140605',
 677                 'license': 'Standard YouTube License',
 678                 'age_limit': 18,
 679             },
 680         },
 681         # Age-gate video with encrypted signature
 682         {
 683             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
 684             'info_dict': {
 685                 'id': '6kLq3WMV1nU',
 686                 'ext': 'webm',
 687                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 688                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 689                 'duration': 246,
 690                 'uploader': 'LloydVEVO',
 691                 'uploader_id': 'LloydVEVO',
 692                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
 693                 'upload_date': '20110629',
 694                 'license': 'Standard YouTube License',
 695                 'age_limit': 18,
 696             },
 697         },
 698         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 699         # YouTube Red ad is not captured for creator
 700         {
 701             'url': '__2ABJjxzNo',
 702             'info_dict': {
 703                 'id': '__2ABJjxzNo',
 704                 'ext': 'mp4',
 705                 'duration': 266,
 706                 'upload_date': '20100430',
 707                 'uploader_id': 'deadmau5',
 708                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
 709                 'creator': 'deadmau5',
 710                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 711                 'uploader': 'deadmau5',
 712                 'license': 'Standard YouTube License',
 713                 'title': 'Deadmau5 - Some Chords (HD)',
 714                 'alt_title': 'Some Chords',
 715             },
 716             'expected_warnings': [
 717                 'DASH manifest missing',
 718             ]
 719         },
 720         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 721         {
 722             'url': 'lqQg6PlCWgI',
 723             'info_dict': {
 724                 'id': 'lqQg6PlCWgI',
 725                 'ext': 'mp4',
 726                 'duration': 6085,
 727                 'upload_date': '20150827',
 728                 'uploader_id': 'olympic',
 729                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
 730                 'license': 'Standard YouTube License',
 731                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 732                 'uploader': 'Olympic',
 733                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 734             },
 735             'params': {
 736                 'skip_download': 'requires avconv',
 737             }
 738         },
 739         # Non-square pixels
 740         {
 741             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 742             'info_dict': {
 743                 'id': '_b-2C3KPAM0',
 744                 'ext': 'mp4',
 745                 'stretched_ratio': 16 / 9.,
 746                 'duration': 85,
 747                 'upload_date': '20110310',
 748                 'uploader_id': 'AllenMeow',
 749                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
 750                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 751                 'uploader': '孫ᄋᄅ',
 752                 'license': 'Standard YouTube License',
 753                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 754             },
 755         },
 756         # url_encoded_fmt_stream_map is empty string
 757         {
 758             'url': 'qEJwOuvDf7I',
 759             'info_dict': {
 760                 'id': 'qEJwOuvDf7I',
 761                 'ext': 'webm',
 762                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 763                 'description': '',
 764                 'upload_date': '20150404',
 765                 'uploader_id': 'spbelect',
 766                 'uploader': 'Наблюдатели Петербурга',
 767             },
 768             'params': {
 769                 'skip_download': 'requires avconv',
 770             },
 771             'skip': 'This live event has ended.',
 772         },
 773         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 774         {
 775             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 776             'info_dict': {
 777                 'id': 'FIl7x6_3R5Y',
 778                 'ext': 'webm',
 779                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 780                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 781                 'duration': 220,
 782                 'upload_date': '20150625',
 783                 'uploader_id': 'dorappi2000',
 784                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
 785                 'uploader': 'dorappi2000',
 786                 'license': 'Standard YouTube License',
 787                 'formats': 'mincount:31',
 788             },
 789             'skip': 'not actual anymore',
 790         },
 791         # DASH manifest with segment_list
 792         {
 793             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 794             'md5': '8ce563a1d667b599d21064e982ab9e31',
 795             'info_dict': {
 796                 'id': 'CsmdDsKjzN8',
 797                 'ext': 'mp4',
 798                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 799                 'uploader': 'Airtek',
 800                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 801                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 802                 'license': 'Standard YouTube License',
 803                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 804             },
 805             'params': {
 806                 'youtube_include_dash_manifest': True,
 807                 'format': '135',  # bestvideo
 808             },
 809             'skip': 'This live event has ended.',
 810         },
 811         {
 812             # Multifeed videos (multiple cameras), URL is for Main Camera
 813             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 814             'info_dict': {
 815                 'id': 'jqWvoWXjCVs',
 816                 'title': 'teamPGP: Rocket League Noob Stream',
 817                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 818             },
 819             'playlist': [{
 820                 'info_dict': {
 821                     'id': 'jqWvoWXjCVs',
 822                     'ext': 'mp4',
 823                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 824                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 825                     'duration': 7335,
 826                     'upload_date': '20150721',
 827                     'uploader': 'Beer Games Beer',
 828                     'uploader_id': 'beergamesbeer',
 829                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 830                     'license': 'Standard YouTube License',
 831                 },
 832             }, {
 833                 'info_dict': {
 834                     'id': '6h8e8xoXJzg',
 835                     'ext': 'mp4',
 836                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 837                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 838                     'duration': 7337,
 839                     'upload_date': '20150721',
 840                     'uploader': 'Beer Games Beer',
 841                     'uploader_id': 'beergamesbeer',
 842                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 843                     'license': 'Standard YouTube License',
 844                 },
 845             }, {
 846                 'info_dict': {
 847                     'id': 'PUOgX5z9xZw',
 848                     'ext': 'mp4',
 849                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 850                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 851                     'duration': 7337,
 852                     'upload_date': '20150721',
 853                     'uploader': 'Beer Games Beer',
 854                     'uploader_id': 'beergamesbeer',
 855                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 856                     'license': 'Standard YouTube License',
 857                 },
 858             }, {
 859                 'info_dict': {
 860                     'id': 'teuwxikvS5k',
 861                     'ext': 'mp4',
 862                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 863                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 864                     'duration': 7334,
 865                     'upload_date': '20150721',
 866                     'uploader': 'Beer Games Beer',
 867                     'uploader_id': 'beergamesbeer',
 868                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
 869                     'license': 'Standard YouTube License',
 870                 },
 871             }],
 872             'params': {
 873                 'skip_download': True,
 874             },
 875         },
 876         {
 877             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
 878             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
 879             'info_dict': {
 880                 'id': 'gVfLd0zydlo',
 881                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
 882             },
 883             'playlist_count': 2,
 884             'skip': 'Not multifeed anymore',
 885         },
 886         {
 887             'url': 'https://vid.plus/FlRa-iH7PGw',
 888             'only_matching': True,
 889         },
 890         {
 891             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
 892             'only_matching': True,
 893         },
 894         {
 895             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 896             # Also tests cut-off URL expansion in video description (see
 897             # https://github.com/rg3/youtube-dl/issues/1892,
 898             # https://github.com/rg3/youtube-dl/issues/8164)
 899             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 900             'info_dict': {
 901                 'id': 'lsguqyKfVQg',
 902                 'ext': 'mp4',
 903                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 904                 'alt_title': 'Dark Walk - Position Music',
 905                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 906                 'duration': 133,
 907                 'upload_date': '20151119',
 908                 'uploader_id': 'IronSoulElf',
 909                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
 910                 'uploader': 'IronSoulElf',
 911                 'license': 'Standard YouTube License',
 912                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 913                 'track': 'Dark Walk - Position Music',
 914                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
 915             },
 916             'params': {
 917                 'skip_download': True,
 918             },
 919         },
 920         {
 921             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 922             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 923             'only_matching': True,
 924         },
 925         {
 926             # Video with yt:stretch=17:0
 927             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 928             'info_dict': {
 929                 'id': 'Q39EVAstoRM',
 930                 'ext': 'mp4',
 931                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 932                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 933                 'upload_date': '20151107',
 934                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 935                 'uploader': 'CH GAMER DROID',
 936             },
 937             'params': {
 938                 'skip_download': True,
 939             },
 940             'skip': 'This video does not exist.',
 941         },
 942         {
 943             # Video licensed under Creative Commons
 944             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
 945             'info_dict': {
 946                 'id': 'M4gD1WSo5mA',
 947                 'ext': 'mp4',
 948                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
 949                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
 950                 'duration': 721,
 951                 'upload_date': '20150127',
 952                 'uploader_id': 'BerkmanCenter',
 953                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
 954                 'uploader': 'The Berkman Klein Center for Internet & Society',
 955                 'license': 'Creative Commons Attribution license (reuse allowed)',
 956             },
 957             'params': {
 958                 'skip_download': True,
 959             },
 960         },
 961         {
 962             # Channel-like uploader_url
 963             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
 964             'info_dict': {
 965                 'id': 'eQcmzGIKrzg',
 966                 'ext': 'mp4',
 967                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
 968                 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
 969                 'duration': 4060,
 970                 'upload_date': '20151119',
 971                 'uploader': 'Bernie Sanders',
 972                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
 973                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
 974                 'license': 'Creative Commons Attribution license (reuse allowed)',
 975             },
 976             'params': {
 977                 'skip_download': True,
 978             },
 979         },
 980         {
 981             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 982             'only_matching': True,
 983         },
 984         {
 985             # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
 986             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
 987             'only_matching': True,
 988         },
 989         {
 990             # Rental video preview
 991             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
 992             'info_dict': {
 993                 'id': 'uGpuVWrhIzE',
 994                 'ext': 'mp4',
 995                 'title': 'Piku - Trailer',
 996                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
 997                 'upload_date': '20150811',
 998                 'uploader': 'FlixMatrix',
 999                 'uploader_id': 'FlixMatrixKaravan',
1000                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1001                 'license': 'Standard YouTube License',
1002             },
1003             'params': {
1004                 'skip_download': True,
1005             },
1006             'skip': 'This video is not available.',
1007         },
1008         {
1009             # YouTube Red video with episode data
1010             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1011             'info_dict': {
1012                 'id': 'iqKdEhx-dD4',
1013                 'ext': 'mp4',
1014                 'title': 'Isolation - Mind Field (Ep 1)',
1015                 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1016                 'duration': 2085,
1017                 'upload_date': '20170118',
1018                 'uploader': 'Vsauce',
1019                 'uploader_id': 'Vsauce',
1020                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1021                 'license': 'Standard YouTube License',
1022                 'series': 'Mind Field',
1023                 'season_number': 1,
1024                 'episode_number': 1,
1025             },
1026             'params': {
1027                 'skip_download': True,
1028             },
1029             'expected_warnings': [
1030                 'Skipping DASH manifest',
1031             ],
1032         },
1033         {
1034             # The following content has been identified by the YouTube community
1035             # as inappropriate or offensive to some audiences.
1036             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1037             'info_dict': {
1038                 'id': '6SJNVb0GnPI',
1039                 'ext': 'mp4',
1040                 'title': 'Race Differences in Intelligence',
1041                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1042                 'duration': 965,
1043                 'upload_date': '20140124',
1044                 'uploader': 'New Century Foundation',
1045                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1046                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1047                 'license': 'Standard YouTube License',
1048             },
1049             'params': {
1050                 'skip_download': True,
1051             },
1052         },
1053         {
1054             # itag 212
1055             'url': '1t24XAntNCY',
1056             'only_matching': True,
1057         },
1058         {
1059             # geo restricted to JP
1060             'url': 'sJL6WA-aGkQ',
1061             'only_matching': True,
1062         },
1063         {
1064             'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1065             'only_matching': True,
1066         },
1067     ]
1068
1069     def __init__(self, *args, **kwargs):
1070         super(YoutubeIE, self).__init__(*args, **kwargs)
1071         self._player_cache = {}
1072
1073     def report_video_info_webpage_download(self, video_id):
1074         """Report attempt to download video info webpage."""
1075         self.to_screen('%s: Downloading video info webpage' % video_id)
1076
1077     def report_information_extraction(self, video_id):
1078         """Report attempt to extract video information."""
1079         self.to_screen('%s: Extracting video information' % video_id)
1080
1081     def report_unavailable_format(self, video_id, format):
1082         """Report extracted video URL."""
1083         self.to_screen('%s: Format %s not available' % (video_id, format))
1084
1085     def report_rtmp_download(self):
1086         """Indicate the download will use the RTMP protocol."""
1087         self.to_screen('RTMP download detected')
1088
1089     def _signature_cache_id(self, example_sig):
1090         """ Return a string representation of a signature """
1091         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1092
1093     def _extract_signature_function(self, video_id, player_url, example_sig):
1094         id_m = re.match(
1095             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1096             player_url)
1097         if not id_m:
1098             raise ExtractorError('Cannot identify player %r' % player_url)
1099         player_type = id_m.group('ext')
1100         player_id = id_m.group('id')
1101
1102         # Read from filesystem cache
1103         func_id = '%s_%s_%s' % (
1104             player_type, player_id, self._signature_cache_id(example_sig))
1105         assert os.path.basename(func_id) == func_id
1106
1107         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1108         if cache_spec is not None:
1109             return lambda s: ''.join(s[i] for i in cache_spec)
1110
1111         download_note = (
1112             'Downloading player %s' % player_url
1113             if self._downloader.params.get('verbose') else
1114             'Downloading %s player %s' % (player_type, player_id)
1115         )
1116         if player_type == 'js':
1117             code = self._download_webpage(
1118                 player_url, video_id,
1119                 note=download_note,
1120                 errnote='Download of %s failed' % player_url)
1121             res = self._parse_sig_js(code)
1122         elif player_type == 'swf':
1123             urlh = self._request_webpage(
1124                 player_url, video_id,
1125                 note=download_note,
1126                 errnote='Download of %s failed' % player_url)
1127             code = urlh.read()
1128             res = self._parse_sig_swf(code)
1129         else:
1130             assert False, 'Invalid player type %r' % player_type
1131
1132         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1133         cache_res = res(test_string)
1134         cache_spec = [ord(c) for c in cache_res]
1135
1136         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1137         return res
1138
1139     def _print_sig_code(self, func, example_sig):
1140         def gen_sig_code(idxs):
1141             def _genslice(start, end, step):
1142                 starts = '' if start == 0 else str(start)
1143                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1144                 steps = '' if step == 1 else (':%d' % step)
1145                 return 's[%s%s%s]' % (starts, ends, steps)
1146
1147             step = None
1148             # Quelch pyflakes warnings - start will be set when step is set
1149             start = '(Never used)'
1150             for i, prev in zip(idxs[1:], idxs[:-1]):
1151                 if step is not None:
1152                     if i - prev == step:
1153                         continue
1154                     yield _genslice(start, prev, step)
1155                     step = None
1156                     continue
1157                 if i - prev in [-1, 1]:
1158                     step = i - prev
1159                     start = prev
1160                     continue
1161                 else:
1162                     yield 's[%d]' % prev
1163             if step is None:
1164                 yield 's[%d]' % i
1165             else:
1166                 yield _genslice(start, i, step)
1167
1168         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1169         cache_res = func(test_string)
1170         cache_spec = [ord(c) for c in cache_res]
1171         expr_code = ' + '.join(gen_sig_code(cache_spec))
1172         signature_id_tuple = '(%s)' % (
1173             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1174         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1175                 '    return %s\n') % (signature_id_tuple, expr_code)
1176         self.to_screen('Extracted signature function:\n' + code)
1177
1178     def _parse_sig_js(self, jscode):
1179         funcname = self._search_regex(
1180             (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1181              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1182              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1183              r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1184             jscode, 'Initial JS player signature function name', group='sig')
1185
1186         jsi = JSInterpreter(jscode)
1187         initial_function = jsi.extract_function(funcname)
1188         return lambda s: initial_function([s])
1189
1190     def _parse_sig_swf(self, file_contents):
1191         swfi = SWFInterpreter(file_contents)
1192         TARGET_CLASSNAME = 'SignatureDecipher'
1193         searched_class = swfi.extract_class(TARGET_CLASSNAME)
1194         initial_function = swfi.extract_function(searched_class, 'decipher')
1195         return lambda s: initial_function([s])
1196
1197     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1198         """Turn the encrypted s field into a working signature"""
1199
1200         if player_url is None:
1201             raise ExtractorError('Cannot decrypt signature without player_url')
1202
1203         if player_url.startswith('//'):
1204             player_url = 'https:' + player_url
1205         elif not re.match(r'https?://', player_url):
1206             player_url = compat_urlparse.urljoin(
1207                 'https://www.youtube.com', player_url)
1208         try:
1209             player_id = (player_url, self._signature_cache_id(s))
1210             if player_id not in self._player_cache:
1211                 func = self._extract_signature_function(
1212                     video_id, player_url, s
1213                 )
1214                 self._player_cache[player_id] = func
1215             func = self._player_cache[player_id]
1216             if self._downloader.params.get('youtube_print_sig_code'):
1217                 self._print_sig_code(func, s)
1218             return func(s)
1219         except Exception as e:
1220             tb = traceback.format_exc()
1221             raise ExtractorError(
1222                 'Signature extraction failed: ' + tb, cause=e)
1223
1224     def _get_subtitles(self, video_id, webpage):
1225         try:
1226             subs_doc = self._download_xml(
1227                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1228                 video_id, note=False)
1229         except ExtractorError as err:
1230             self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
1231             return {}
1232
1233         sub_lang_list = {}
1234         for track in subs_doc.findall('track'):
1235             lang = track.attrib['lang_code']
1236             if lang in sub_lang_list:
1237                 continue
1238             sub_formats = []
1239             for ext in self._SUBTITLE_FORMATS:
1240                 params = compat_urllib_parse_urlencode({
1241                     'lang': lang,
1242                     'v': video_id,
1243                     'fmt': ext,
1244                     'name': track.attrib['name'].encode('utf-8'),
1245                 })
1246                 sub_formats.append({
1247                     'url': 'https://www.youtube.com/api/timedtext?' + params,
1248                     'ext': ext,
1249                 })
1250             sub_lang_list[lang] = sub_formats
1251         if not sub_lang_list:
1252             self._downloader.report_warning('video doesn\'t have subtitles')
1253             return {}
1254         return sub_lang_list
1255
1256     def _get_ytplayer_config(self, video_id, webpage):
1257         patterns = (
1258             # User data may contain arbitrary character sequences that may affect
1259             # JSON extraction with regex, e.g. when '};' is contained the second
1260             # regex won't capture the whole JSON. Yet working around by trying more
1261             # concrete regex first keeping in mind proper quoted string handling
1262             # to be implemented in future that will replace this workaround (see
1263             # https://github.com/rg3/youtube-dl/issues/7468,
1264             # https://github.com/rg3/youtube-dl/pull/7599)
1265             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1266             r';ytplayer\.config\s*=\s*({.+?});',
1267         )
1268         config = self._search_regex(
1269             patterns, webpage, 'ytplayer.config', default=None)
1270         if config:
1271             return self._parse_json(
1272                 uppercase_escape(config), video_id, fatal=False)
1273
1274     def _get_automatic_captions(self, video_id, webpage):
1275         """We need the webpage for getting the captions url, pass it as an
1276            argument to speed up the process."""
1277         self.to_screen('%s: Looking for automatic captions' % video_id)
1278         player_config = self._get_ytplayer_config(video_id, webpage)
1279         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
1280         if not player_config:
1281             self._downloader.report_warning(err_msg)
1282             return {}
1283         try:
1284             args = player_config['args']
1285             caption_url = args.get('ttsurl')
1286             if caption_url:
1287                 timestamp = args['timestamp']
1288                 # We get the available subtitles
1289                 list_params = compat_urllib_parse_urlencode({
1290                     'type': 'list',
1291                     'tlangs': 1,
1292                     'asrs': 1,
1293                 })
1294                 list_url = caption_url + '&' + list_params
1295                 caption_list = self._download_xml(list_url, video_id)
1296                 original_lang_node = caption_list.find('track')
1297                 if original_lang_node is None:
1298                     self._downloader.report_warning('Video doesn\'t have automatic captions')
1299                     return {}
1300                 original_lang = original_lang_node.attrib['lang_code']
1301                 caption_kind = original_lang_node.attrib.get('kind', '')
1302
1303                 sub_lang_list = {}
1304                 for lang_node in caption_list.findall('target'):
1305                     sub_lang = lang_node.attrib['lang_code']
1306                     sub_formats = []
1307                     for ext in self._SUBTITLE_FORMATS:
1308                         params = compat_urllib_parse_urlencode({
1309                             'lang': original_lang,
1310                             'tlang': sub_lang,
1311                             'fmt': ext,
1312                             'ts': timestamp,
1313                             'kind': caption_kind,
1314                         })
1315                         sub_formats.append({
1316                             'url': caption_url + '&' + params,
1317                             'ext': ext,
1318                         })
1319                     sub_lang_list[sub_lang] = sub_formats
1320                 return sub_lang_list
1321
1322             def make_captions(sub_url, sub_langs):
1323                 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1324                 caption_qs = compat_parse_qs(parsed_sub_url.query)
1325                 captions = {}
1326                 for sub_lang in sub_langs:
1327                     sub_formats = []
1328                     for ext in self._SUBTITLE_FORMATS:
1329                         caption_qs.update({
1330                             'tlang': [sub_lang],
1331                             'fmt': [ext],
1332                         })
1333                         sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1334                             query=compat_urllib_parse_urlencode(caption_qs, True)))
1335                         sub_formats.append({
1336                             'url': sub_url,
1337                             'ext': ext,
1338                         })
1339                     captions[sub_lang] = sub_formats
1340                 return captions
1341
1342             # New captions format as of 22.06.2017
1343             player_response = args.get('player_response')
1344             if player_response and isinstance(player_response, compat_str):
1345                 player_response = self._parse_json(
1346                     player_response, video_id, fatal=False)
1347                 if player_response:
1348                     renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1349                     base_url = renderer['captionTracks'][0]['baseUrl']
1350                     sub_lang_list = []
1351                     for lang in renderer['translationLanguages']:
1352                         lang_code = lang.get('languageCode')
1353                         if lang_code:
1354                             sub_lang_list.append(lang_code)
1355                     return make_captions(base_url, sub_lang_list)
1356
1357             # Some videos don't provide ttsurl but rather caption_tracks and
1358             # caption_translation_languages (e.g. 20LmZk1hakA)
1359             # Does not used anymore as of 22.06.2017
1360             caption_tracks = args['caption_tracks']
1361             caption_translation_languages = args['caption_translation_languages']
1362             caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1363             sub_lang_list = []
1364             for lang in caption_translation_languages.split(','):
1365                 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1366                 sub_lang = lang_qs.get('lc', [None])[0]
1367                 if sub_lang:
1368                     sub_lang_list.append(sub_lang)
1369             return make_captions(caption_url, sub_lang_list)
1370         # An extractor error can be raise by the download process if there are
1371         # no automatic captions but there are subtitles
1372         except (KeyError, IndexError, ExtractorError):
1373             self._downloader.report_warning(err_msg)
1374             return {}
1375
1376     def _mark_watched(self, video_id, video_info):
1377         playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1378         if not playback_url:
1379             return
1380         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1381         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1382
1383         # cpn generation algorithm is reverse engineered from base.js.
1384         # In fact it works even with dummy cpn.
1385         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1386         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1387
1388         qs.update({
1389             'ver': ['2'],
1390             'cpn': [cpn],
1391         })
1392         playback_url = compat_urlparse.urlunparse(
1393             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1394
1395         self._download_webpage(
1396             playback_url, video_id, 'Marking watched',
1397             'Unable to mark watched', fatal=False)
1398
1399     @staticmethod
1400     def _extract_urls(webpage):
1401         # Embedded YouTube player
1402         entries = [
1403             unescapeHTML(mobj.group('url'))
1404             for mobj in re.finditer(r'''(?x)
1405             (?:
1406                 <iframe[^>]+?src=|
1407                 data-video-url=|
1408                 <embed[^>]+?src=|
1409                 embedSWF\(?:\s*|
1410                 <object[^>]+data=|
1411                 new\s+SWFObject\(
1412             )
1413             (["\'])
1414                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1415                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1416             \1''', webpage)]
1417
1418         # lazyYT YouTube embed
1419         entries.extend(list(map(
1420             unescapeHTML,
1421             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1422
1423         # Wordpress "YouTube Video Importer" plugin
1424         matches = re.findall(r'''(?x)<div[^>]+
1425             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1426             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1427         entries.extend(m[-1] for m in matches)
1428
1429         return entries
1430
1431     @staticmethod
1432     def _extract_url(webpage):
1433         urls = YoutubeIE._extract_urls(webpage)
1434         return urls[0] if urls else None
1435
1436     @classmethod
1437     def extract_id(cls, url):
1438         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1439         if mobj is None:
1440             raise ExtractorError('Invalid URL: %s' % url)
1441         video_id = mobj.group(2)
1442         return video_id
1443
1444     def _extract_annotations(self, video_id):
1445         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1446         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1447
1448     @staticmethod
1449     def _extract_chapters(description, duration):
1450         if not description:
1451             return None
1452         chapter_lines = re.findall(
1453             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1454             description)
1455         if not chapter_lines:
1456             return None
1457         chapters = []
1458         for next_num, (chapter_line, time_point) in enumerate(
1459                 chapter_lines, start=1):
1460             start_time = parse_duration(time_point)
1461             if start_time is None:
1462                 continue
1463             if start_time > duration:
1464                 break
1465             end_time = (duration if next_num == len(chapter_lines)
1466                         else parse_duration(chapter_lines[next_num][1]))
1467             if end_time is None:
1468                 continue
1469             if end_time > duration:
1470                 end_time = duration
1471             if start_time > end_time:
1472                 break
1473             chapter_title = re.sub(
1474                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1475             chapter_title = re.sub(r'\s+', ' ', chapter_title)
1476             chapters.append({
1477                 'start_time': start_time,
1478                 'end_time': end_time,
1479                 'title': chapter_title,
1480             })
1481         return chapters
1482
1483     def _real_extract(self, url):
1484         url, smuggled_data = unsmuggle_url(url, {})
1485
1486         proto = (
1487             'http' if self._downloader.params.get('prefer_insecure', False)
1488             else 'https')
1489
1490         start_time = None
1491         end_time = None
1492         parsed_url = compat_urllib_parse_urlparse(url)
1493         for component in [parsed_url.fragment, parsed_url.query]:
1494             query = compat_parse_qs(component)
1495             if start_time is None and 't' in query:
1496                 start_time = parse_duration(query['t'][0])
1497             if start_time is None and 'start' in query:
1498                 start_time = parse_duration(query['start'][0])
1499             if end_time is None and 'end' in query:
1500                 end_time = parse_duration(query['end'][0])
1501
1502         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1503         mobj = re.search(self._NEXT_URL_RE, url)
1504         if mobj:
1505             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1506         video_id = self.extract_id(url)
1507
1508         # Get video webpage
1509         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1510         video_webpage = self._download_webpage(url, video_id)
1511
1512         # Attempt to extract SWF player URL
1513         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1514         if mobj is not None:
1515             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1516         else:
1517             player_url = None
1518
1519         dash_mpds = []
1520
1521         def add_dash_mpd(video_info):
1522             dash_mpd = video_info.get('dashmpd')
1523             if dash_mpd and dash_mpd[0] not in dash_mpds:
1524                 dash_mpds.append(dash_mpd[0])
1525
1526         is_live = None
1527         view_count = None
1528
1529         def extract_view_count(v_info):
1530             return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1531
1532         # Get video info
1533         embed_webpage = None
1534         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1535             age_gate = True
1536             # We simulate the access to the video from www.youtube.com/v/{video_id}
1537             # this can be viewed without login into Youtube
1538             url = proto + '://www.youtube.com/embed/%s' % video_id
1539             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1540             data = compat_urllib_parse_urlencode({
1541                 'video_id': video_id,
1542                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1543                 'sts': self._search_regex(
1544                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1545             })
1546             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1547             video_info_webpage = self._download_webpage(
1548                 video_info_url, video_id,
1549                 note='Refetching age-gated info webpage',
1550                 errnote='unable to download video info webpage')
1551             video_info = compat_parse_qs(video_info_webpage)
1552             add_dash_mpd(video_info)
1553         else:
1554             age_gate = False
1555             video_info = None
1556             sts = None
1557             # Try looking directly into the video webpage
1558             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1559             if ytplayer_config:
1560                 args = ytplayer_config['args']
1561                 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1562                     # Convert to the same format returned by compat_parse_qs
1563                     video_info = dict((k, [v]) for k, v in args.items())
1564                     add_dash_mpd(video_info)
1565                 # Rental video is not rented but preview is available (e.g.
1566                 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1567                 # https://github.com/rg3/youtube-dl/issues/10532)
1568                 if not video_info and args.get('ypc_vid'):
1569                     return self.url_result(
1570                         args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1571                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1572                     is_live = True
1573                 sts = ytplayer_config.get('sts')
1574             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1575                 # We also try looking in get_video_info since it may contain different dashmpd
1576                 # URL that points to a DASH manifest with possibly different itag set (some itags
1577                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1578                 # manifest pointed by get_video_info's dashmpd).
1579                 # The general idea is to take a union of itags of both DASH manifests (for example
1580                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1581                 self.report_video_info_webpage_download(video_id)
1582                 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1583                     query = {
1584                         'video_id': video_id,
1585                         'ps': 'default',
1586                         'eurl': '',
1587                         'gl': 'US',
1588                         'hl': 'en',
1589                     }
1590                     if el:
1591                         query['el'] = el
1592                     if sts:
1593                         query['sts'] = sts
1594                     video_info_webpage = self._download_webpage(
1595                         '%s://www.youtube.com/get_video_info' % proto,
1596                         video_id, note=False,
1597                         errnote='unable to download video info webpage',
1598                         fatal=False, query=query)
1599                     if not video_info_webpage:
1600                         continue
1601                     get_video_info = compat_parse_qs(video_info_webpage)
1602                     add_dash_mpd(get_video_info)
1603                     if view_count is None:
1604                         view_count = extract_view_count(get_video_info)
1605                     if not video_info:
1606                         video_info = get_video_info
1607                     if 'token' in get_video_info:
1608                         # Different get_video_info requests may report different results, e.g.
1609                         # some may report video unavailability, but some may serve it without
1610                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1611                         # the original webpage as well as el=info and el=embedded get_video_info
1612                         # requests report video unavailability due to geo restriction while
1613                         # el=detailpage succeeds and returns valid data). This is probably
1614                         # due to YouTube measures against IP ranges of hosting providers.
1615                         # Working around by preferring the first succeeded video_info containing
1616                         # the token if no such video_info yet was found.
1617                         if 'token' not in video_info:
1618                             video_info = get_video_info
1619                         break
1620
1621         def extract_unavailable_message():
1622             return self._html_search_regex(
1623                 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1624                 video_webpage, 'unavailable message', default=None)
1625
1626         if 'token' not in video_info:
1627             if 'reason' in video_info:
1628                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1629                     regions_allowed = self._html_search_meta(
1630                         'regionsAllowed', video_webpage, default=None)
1631                     countries = regions_allowed.split(',') if regions_allowed else None
1632                     self.raise_geo_restricted(
1633                         msg=video_info['reason'][0], countries=countries)
1634                 reason = video_info['reason'][0]
1635                 if 'Invalid parameters' in reason:
1636                     unavailable_message = extract_unavailable_message()
1637                     if unavailable_message:
1638                         reason = unavailable_message
1639                 raise ExtractorError(
1640                     'YouTube said: %s' % reason,
1641                     expected=True, video_id=video_id)
1642             else:
1643                 raise ExtractorError(
1644                     '"token" parameter not in video info for unknown reason',
1645                     video_id=video_id)
1646
1647         # title
1648         if 'title' in video_info:
1649             video_title = video_info['title'][0]
1650         else:
1651             self._downloader.report_warning('Unable to extract video title')
1652             video_title = '_'
1653
1654         # description
1655         description_original = video_description = get_element_by_id("eow-description", video_webpage)
1656         if video_description:
1657
1658             def replace_url(m):
1659                 redir_url = compat_urlparse.urljoin(url, m.group(1))
1660                 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1661                 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1662                     qs = compat_parse_qs(parsed_redir_url.query)
1663                     q = qs.get('q')
1664                     if q and q[0]:
1665                         return q[0]
1666                 return redir_url
1667
1668             description_original = video_description = re.sub(r'''(?x)
1669                 <a\s+
1670                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1671                     (?:title|href)="([^"]+)"\s+
1672                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
1673                     class="[^"]*"[^>]*>
1674                 [^<]+\.{3}\s*
1675                 </a>
1676             ''', replace_url, video_description)
1677             video_description = clean_html(video_description)
1678         else:
1679             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1680             if fd_mobj:
1681                 video_description = unescapeHTML(fd_mobj.group(1))
1682             else:
1683                 video_description = ''
1684
1685         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1686             if not self._downloader.params.get('noplaylist'):
1687                 entries = []
1688                 feed_ids = []
1689                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
1690                 for feed in multifeed_metadata_list.split(','):
1691                     # Unquote should take place before split on comma (,) since textual
1692                     # fields may contain comma as well (see
1693                     # https://github.com/rg3/youtube-dl/issues/8536)
1694                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1695                     entries.append({
1696                         '_type': 'url_transparent',
1697                         'ie_key': 'Youtube',
1698                         'url': smuggle_url(
1699                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1700                             {'force_singlefeed': True}),
1701                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1702                     })
1703                     feed_ids.append(feed_data['id'][0])
1704                 self.to_screen(
1705                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1706                     % (', '.join(feed_ids), video_id))
1707                 return self.playlist_result(entries, video_id, video_title, video_description)
1708             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1709
1710         if view_count is None:
1711             view_count = extract_view_count(video_info)
1712
1713         # Check for "rental" videos
1714         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1715             raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
1716
1717         def _extract_filesize(media_url):
1718             return int_or_none(self._search_regex(
1719                 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1720
1721         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1722             self.report_rtmp_download()
1723             formats = [{
1724                 'format_id': '_rtmp',
1725                 'protocol': 'rtmp',
1726                 'url': video_info['conn'][0],
1727                 'player_url': player_url,
1728             }]
1729         elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
1730             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1731             if 'rtmpe%3Dyes' in encoded_url_map:
1732                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1733             formats_spec = {}
1734             fmt_list = video_info.get('fmt_list', [''])[0]
1735             if fmt_list:
1736                 for fmt in fmt_list.split(','):
1737                     spec = fmt.split('/')
1738                     if len(spec) > 1:
1739                         width_height = spec[1].split('x')
1740                         if len(width_height) == 2:
1741                             formats_spec[spec[0]] = {
1742                                 'resolution': spec[1],
1743                                 'width': int_or_none(width_height[0]),
1744                                 'height': int_or_none(width_height[1]),
1745                             }
1746             q = qualities(['small', 'medium', 'hd720'])
1747             formats = []
1748             for url_data_str in encoded_url_map.split(','):
1749                 url_data = compat_parse_qs(url_data_str)
1750                 if 'itag' not in url_data or 'url' not in url_data:
1751                     continue
1752                 format_id = url_data['itag'][0]
1753                 url = url_data['url'][0]
1754
1755                 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1756                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1757                     jsplayer_url_json = self._search_regex(
1758                         ASSETS_RE,
1759                         embed_webpage if age_gate else video_webpage,
1760                         'JS player URL (1)', default=None)
1761                     if not jsplayer_url_json and not age_gate:
1762                         # We need the embed website after all
1763                         if embed_webpage is None:
1764                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1765                             embed_webpage = self._download_webpage(
1766                                 embed_url, video_id, 'Downloading embed webpage')
1767                         jsplayer_url_json = self._search_regex(
1768                             ASSETS_RE, embed_webpage, 'JS player URL')
1769
1770                     player_url = json.loads(jsplayer_url_json)
1771                     if player_url is None:
1772                         player_url_json = self._search_regex(
1773                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1774                             video_webpage, 'age gate player URL')
1775                         player_url = json.loads(player_url_json)
1776
1777                 if 'sig' in url_data:
1778                     url += '&signature=' + url_data['sig'][0]
1779                 elif 's' in url_data:
1780                     encrypted_sig = url_data['s'][0]
1781
1782                     if self._downloader.params.get('verbose'):
1783                         if player_url is None:
1784                             player_version = 'unknown'
1785                             player_desc = 'unknown'
1786                         else:
1787                             if player_url.endswith('swf'):
1788                                 player_version = self._search_regex(
1789                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1790                                     'flash player', fatal=False)
1791                                 player_desc = 'flash player %s' % player_version
1792                             else:
1793                                 player_version = self._search_regex(
1794                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1795                                      r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
1796                                     player_url,
1797                                     'html5 player', fatal=False)
1798                                 player_desc = 'html5 player %s' % player_version
1799
1800                         parts_sizes = self._signature_cache_id(encrypted_sig)
1801                         self.to_screen('{%s} signature length %s, %s' %
1802                                        (format_id, parts_sizes, player_desc))
1803
1804                     signature = self._decrypt_signature(
1805                         encrypted_sig, video_id, player_url, age_gate)
1806                     url += '&signature=' + signature
1807                 if 'ratebypass' not in url:
1808                     url += '&ratebypass=yes'
1809
1810                 dct = {
1811                     'format_id': format_id,
1812                     'url': url,
1813                     'player_url': player_url,
1814                 }
1815                 if format_id in self._formats:
1816                     dct.update(self._formats[format_id])
1817                 if format_id in formats_spec:
1818                     dct.update(formats_spec[format_id])
1819
1820                 # Some itags are not included in DASH manifest thus corresponding formats will
1821                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1822                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1823                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1824                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1825
1826                 filesize = int_or_none(url_data.get(
1827                     'clen', [None])[0]) or _extract_filesize(url)
1828
1829                 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1830
1831                 more_fields = {
1832                     'filesize': filesize,
1833                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1834                     'width': width,
1835                     'height': height,
1836                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1837                     'format_note': quality,
1838                     'quality': q(quality),
1839                 }
1840                 for key, value in more_fields.items():
1841                     if value:
1842                         dct[key] = value
1843                 type_ = url_data.get('type', [None])[0]
1844                 if type_:
1845                     type_split = type_.split(';')
1846                     kind_ext = type_split[0].split('/')
1847                     if len(kind_ext) == 2:
1848                         kind, _ = kind_ext
1849                         dct['ext'] = mimetype2ext(type_split[0])
1850                         if kind in ('audio', 'video'):
1851                             codecs = None
1852                             for mobj in re.finditer(
1853                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1854                                 if mobj.group('key') == 'codecs':
1855                                     codecs = mobj.group('val')
1856                                     break
1857                             if codecs:
1858                                 dct.update(parse_codecs(codecs))
1859                 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1860                     dct['downloader_options'] = {
1861                         # Youtube throttles chunks >~10M
1862                         'http_chunk_size': 10485760,
1863                     }
1864                 formats.append(dct)
1865         elif video_info.get('hlsvp'):
1866             manifest_url = video_info['hlsvp'][0]
1867             formats = []
1868             m3u8_formats = self._extract_m3u8_formats(
1869                 manifest_url, video_id, 'mp4', fatal=False)
1870             for a_format in m3u8_formats:
1871                 itag = self._search_regex(
1872                     r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1873                 if itag:
1874                     a_format['format_id'] = itag
1875                     if itag in self._formats:
1876                         dct = self._formats[itag].copy()
1877                         dct.update(a_format)
1878                         a_format = dct
1879                 a_format['player_url'] = player_url
1880                 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1881                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1882                 formats.append(a_format)
1883         else:
1884             error_message = clean_html(video_info.get('reason', [None])[0])
1885             if not error_message:
1886                 error_message = extract_unavailable_message()
1887             if error_message:
1888                 raise ExtractorError(error_message, expected=True)
1889             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1890
1891         # uploader
1892         video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1893         if video_uploader:
1894             video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1895         else:
1896             self._downloader.report_warning('unable to extract uploader name')
1897
1898         # uploader_id
1899         video_uploader_id = None
1900         video_uploader_url = None
1901         mobj = re.search(
1902             r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1903             video_webpage)
1904         if mobj is not None:
1905             video_uploader_id = mobj.group('uploader_id')
1906             video_uploader_url = mobj.group('uploader_url')
1907         else:
1908             self._downloader.report_warning('unable to extract uploader nickname')
1909
1910         # thumbnail image
1911         # We try first to get a high quality image:
1912         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1913                             video_webpage, re.DOTALL)
1914         if m_thumb is not None:
1915             video_thumbnail = m_thumb.group(1)
1916         elif 'thumbnail_url' not in video_info:
1917             self._downloader.report_warning('unable to extract video thumbnail')
1918             video_thumbnail = None
1919         else:   # don't panic if we can't find it
1920             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1921
1922         # upload date
1923         upload_date = self._html_search_meta(
1924             'datePublished', video_webpage, 'upload date', default=None)
1925         if not upload_date:
1926             upload_date = self._search_regex(
1927                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1928                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1929                 video_webpage, 'upload date', default=None)
1930         upload_date = unified_strdate(upload_date)
1931
1932         video_license = self._html_search_regex(
1933             r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1934             video_webpage, 'license', default=None)
1935
1936         m_music = re.search(
1937             r'''(?x)
1938                 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1939                 <ul[^>]*>\s*
1940                 <li>(?P<title>.+?)
1941                 by (?P<creator>.+?)
1942                 (?:
1943                     \(.+?\)|
1944                     <a[^>]*
1945                         (?:
1946                             \bhref=["\']/red[^>]*>|             # drop possible
1947                             >\s*Listen ad-free with YouTube Red # YouTube Red ad
1948                         )
1949                     .*?
1950                 )?</li
1951             ''',
1952             video_webpage)
1953         if m_music:
1954             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1955             video_creator = clean_html(m_music.group('creator'))
1956         else:
1957             video_alt_title = video_creator = None
1958
1959         def extract_meta(field):
1960             return self._html_search_regex(
1961                 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1962                 video_webpage, field, default=None)
1963
1964         track = extract_meta('Song')
1965         artist = extract_meta('Artist')
1966
1967         m_episode = re.search(
1968             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1969             video_webpage)
1970         if m_episode:
1971             series = m_episode.group('series')
1972             season_number = int(m_episode.group('season'))
1973             episode_number = int(m_episode.group('episode'))
1974         else:
1975             series = season_number = episode_number = None
1976
1977         m_cat_container = self._search_regex(
1978             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1979             video_webpage, 'categories', default=None)
1980         if m_cat_container:
1981             category = self._html_search_regex(
1982                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1983                 default=None)
1984             video_categories = None if category is None else [category]
1985         else:
1986             video_categories = None
1987
1988         video_tags = [
1989             unescapeHTML(m.group('content'))
1990             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1991
1992         def _extract_count(count_name):
1993             return str_to_int(self._search_regex(
1994                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1995                 % re.escape(count_name),
1996                 video_webpage, count_name, default=None))
1997
1998         like_count = _extract_count('like')
1999         dislike_count = _extract_count('dislike')
2000
2001         # subtitles
2002         video_subtitles = self.extract_subtitles(video_id, video_webpage)
2003         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2004
2005         video_duration = try_get(
2006             video_info, lambda x: int_or_none(x['length_seconds'][0]))
2007         if not video_duration:
2008             video_duration = parse_duration(self._html_search_meta(
2009                 'duration', video_webpage, 'video duration'))
2010
2011         # annotations
2012         video_annotations = None
2013         if self._downloader.params.get('writeannotations', False):
2014             video_annotations = self._extract_annotations(video_id)
2015
2016         chapters = self._extract_chapters(description_original, video_duration)
2017
2018         # Look for the DASH manifest
2019         if self._downloader.params.get('youtube_include_dash_manifest', True):
2020             dash_mpd_fatal = True
2021             for mpd_url in dash_mpds:
2022                 dash_formats = {}
2023                 try:
2024                     def decrypt_sig(mobj):
2025                         s = mobj.group(1)
2026                         dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2027                         return '/signature/%s' % dec_s
2028
2029                     mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2030
2031                     for df in self._extract_mpd_formats(
2032                             mpd_url, video_id, fatal=dash_mpd_fatal,
2033                             formats_dict=self._formats):
2034                         if not df.get('filesize'):
2035                             df['filesize'] = _extract_filesize(df['url'])
2036                         # Do not overwrite DASH format found in some previous DASH manifest
2037                         if df['format_id'] not in dash_formats:
2038                             dash_formats[df['format_id']] = df
2039                         # Additional DASH manifests may end up in HTTP Error 403 therefore
2040                         # allow them to fail without bug report message if we already have
2041                         # some DASH manifest succeeded. This is temporary workaround to reduce
2042                         # burst of bug reports until we figure out the reason and whether it
2043                         # can be fixed at all.
2044                         dash_mpd_fatal = False
2045                 except (ExtractorError, KeyError) as e:
2046                     self.report_warning(
2047                         'Skipping DASH manifest: %r' % e, video_id)
2048                 if dash_formats:
2049                     # Remove the formats we found through non-DASH, they
2050                     # contain less info and it can be wrong, because we use
2051                     # fixed values (for example the resolution). See
2052                     # https://github.com/rg3/youtube-dl/issues/5774 for an
2053                     # example.
2054                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2055                     formats.extend(dash_formats.values())
2056
2057         # Check for malformed aspect ratio
2058         stretched_m = re.search(
2059             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2060             video_webpage)
2061         if stretched_m:
2062             w = float(stretched_m.group('w'))
2063             h = float(stretched_m.group('h'))
2064             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2065             # We will only process correct ratios.
2066             if w > 0 and h > 0:
2067                 ratio = w / h
2068                 for f in formats:
2069                     if f.get('vcodec') != 'none':
2070                         f['stretched_ratio'] = ratio
2071
2072         self._sort_formats(formats)
2073
2074         self.mark_watched(video_id, video_info)
2075
2076         return {
2077             'id': video_id,
2078             'uploader': video_uploader,
2079             'uploader_id': video_uploader_id,
2080             'uploader_url': video_uploader_url,
2081             'upload_date': upload_date,
2082             'license': video_license,
2083             'creator': video_creator or artist,
2084             'title': video_title,
2085             'alt_title': video_alt_title or track,
2086             'thumbnail': video_thumbnail,
2087             'description': video_description,
2088             'categories': video_categories,
2089             'tags': video_tags,
2090             'subtitles': video_subtitles,
2091             'automatic_captions': automatic_captions,
2092             'duration': video_duration,
2093             'age_limit': 18 if age_gate else 0,
2094             'annotations': video_annotations,
2095             'chapters': chapters,
2096             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2097             'view_count': view_count,
2098             'like_count': like_count,
2099             'dislike_count': dislike_count,
2100             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2101             'formats': formats,
2102             'is_live': is_live,
2103             'start_time': start_time,
2104             'end_time': end_time,
2105             'series': series,
2106             'season_number': season_number,
2107             'episode_number': episode_number,
2108             'track': track,
2109             'artist': artist,
2110         }
2111
2112
2113 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2114     IE_DESC = 'YouTube.com playlists'
2115     _VALID_URL = r"""(?x)(?:
2116                         (?:https?://)?
2117                         (?:\w+\.)?
2118                         (?:
2119                             youtube\.com/
2120                             (?:
2121                                (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2122                                \? (?:.*?[&;])*? (?:p|a|list)=
2123                             |  p/
2124                             )|
2125                             youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2126                         )
2127                         (
2128                             (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2129                             # Top tracks, they can also include dots
2130                             |(?:MC)[\w\.]*
2131                         )
2132                         .*
2133                      |
2134                         (%(playlist_id)s)
2135                      )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2136     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2137     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2138     IE_NAME = 'youtube:playlist'
2139     _TESTS = [{
2140         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2141         'info_dict': {
2142             'title': 'ytdl test PL',
2143             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2144         },
2145         'playlist_count': 3,
2146     }, {
2147         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2148         'info_dict': {
2149             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2150             'title': 'YDL_Empty_List',
2151         },
2152         'playlist_count': 0,
2153         'skip': 'This playlist is private',
2154     }, {
2155         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2156         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2157         'info_dict': {
2158             'title': '29C3: Not my department',
2159             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2160         },
2161         'playlist_count': 95,
2162     }, {
2163         'note': 'issue #673',
2164         'url': 'PLBB231211A4F62143',
2165         'info_dict': {
2166             'title': '[OLD]Team Fortress 2 (Class-based LP)',
2167             'id': 'PLBB231211A4F62143',
2168         },
2169         'playlist_mincount': 26,
2170     }, {
2171         'note': 'Large playlist',
2172         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2173         'info_dict': {
2174             'title': 'Uploads from Cauchemar',
2175             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2176         },
2177         'playlist_mincount': 799,
2178     }, {
2179         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2180         'info_dict': {
2181             'title': 'YDL_safe_search',
2182             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2183         },
2184         'playlist_count': 2,
2185         'skip': 'This playlist is private',
2186     }, {
2187         'note': 'embedded',
2188         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2189         'playlist_count': 4,
2190         'info_dict': {
2191             'title': 'JODA15',
2192             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2193         }
2194     }, {
2195         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2196         'playlist_mincount': 485,
2197         'info_dict': {
2198             'title': '2017 華語最新單曲 (2/24更新)',
2199             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2200         }
2201     }, {
2202         'note': 'Embedded SWF player',
2203         'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2204         'playlist_count': 4,
2205         'info_dict': {
2206             'title': 'JODA7',
2207             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2208         }
2209     }, {
2210         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2211         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2212         'info_dict': {
2213             'title': 'Uploads from Interstellar Movie',
2214             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2215         },
2216         'playlist_mincount': 21,
2217     }, {
2218         # Playlist URL that does not actually serve a playlist
2219         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2220         'info_dict': {
2221             'id': 'FqZTN594JQw',
2222             'ext': 'webm',
2223             'title': "Smiley's People 01 detective, Adventure Series, Action",
2224             'uploader': 'STREEM',
2225             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2226             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2227             'upload_date': '20150526',
2228             'license': 'Standard YouTube License',
2229             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2230             'categories': ['People & Blogs'],
2231             'tags': list,
2232             'like_count': int,
2233             'dislike_count': int,
2234         },
2235         'params': {
2236             'skip_download': True,
2237         },
2238         'add_ie': [YoutubeIE.ie_key()],
2239     }, {
2240         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2241         'info_dict': {
2242             'id': 'yeWKywCrFtk',
2243             'ext': 'mp4',
2244             'title': 'Small Scale Baler and Braiding Rugs',
2245             'uploader': 'Backus-Page House Museum',
2246             'uploader_id': 'backuspagemuseum',
2247             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2248             'upload_date': '20161008',
2249             'license': 'Standard YouTube License',
2250             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2251             'categories': ['Nonprofits & Activism'],
2252             'tags': list,
2253             'like_count': int,
2254             'dislike_count': int,
2255         },
2256         'params': {
2257             'noplaylist': True,
2258             'skip_download': True,
2259         },
2260     }, {
2261         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2262         'only_matching': True,
2263     }, {
2264         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2265         'only_matching': True,
2266     }, {
2267         # music album playlist
2268         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2269         'only_matching': True,
2270     }]
2271
2272     def _real_initialize(self):
2273         self._login()
2274
2275     def _extract_mix(self, playlist_id):
2276         # The mixes are generated from a single video
2277         # the id of the playlist is just 'RD' + video_id
2278         ids = []
2279         last_id = playlist_id[-11:]
2280         for n in itertools.count(1):
2281             url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2282             webpage = self._download_webpage(
2283                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2284             new_ids = orderedSet(re.findall(
2285                 r'''(?xs)data-video-username=".*?".*?
2286                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2287                 webpage))
2288             # Fetch new pages until all the videos are repeated, it seems that
2289             # there are always 51 unique videos.
2290             new_ids = [_id for _id in new_ids if _id not in ids]
2291             if not new_ids:
2292                 break
2293             ids.extend(new_ids)
2294             last_id = ids[-1]
2295
2296         url_results = self._ids_to_results(ids)
2297
2298         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2299         title_span = (
2300             search_title('playlist-title') or
2301             search_title('title long-title') or
2302             search_title('title'))
2303         title = clean_html(title_span)
2304
2305         return self.playlist_result(url_results, playlist_id, title)
2306
2307     def _extract_playlist(self, playlist_id):
2308         url = self._TEMPLATE_URL % playlist_id
2309         page = self._download_webpage(url, playlist_id)
2310
2311         # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2312         for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2313             match = match.strip()
2314             # Check if the playlist exists or is private
2315             mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2316             if mobj:
2317                 reason = mobj.group('reason')
2318                 message = 'This playlist %s' % reason
2319                 if 'private' in reason:
2320                     message += ', use --username or --netrc to access it'
2321                 message += '.'
2322                 raise ExtractorError(message, expected=True)
2323             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2324                 raise ExtractorError(
2325                     'Invalid parameters. Maybe URL is incorrect.',
2326                     expected=True)
2327             elif re.match(r'[^<]*Choose your language[^<]*', match):
2328                 continue
2329             else:
2330                 self.report_warning('Youtube gives an alert message: ' + match)
2331
2332         playlist_title = self._html_search_regex(
2333             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2334             page, 'title', default=None)
2335
2336         _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2337         uploader = self._search_regex(
2338             r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2339             page, 'uploader', default=None)
2340         mobj = re.search(
2341             r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2342             page)
2343         if mobj:
2344             uploader_id = mobj.group('uploader_id')
2345             uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2346         else:
2347             uploader_id = uploader_url = None
2348
2349         has_videos = True
2350
2351         if not playlist_title:
2352             try:
2353                 # Some playlist URLs don't actually serve a playlist (e.g.
2354                 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2355                 next(self._entries(page, playlist_id))
2356             except StopIteration:
2357                 has_videos = False
2358
2359         playlist = self.playlist_result(
2360             self._entries(page, playlist_id), playlist_id, playlist_title)
2361         playlist.update({
2362             'uploader': uploader,
2363             'uploader_id': uploader_id,
2364             'uploader_url': uploader_url,
2365         })
2366
2367         return has_videos, playlist
2368
2369     def _check_download_just_video(self, url, playlist_id):
2370         # Check if it's a video-specific URL
2371         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2372         video_id = query_dict.get('v', [None])[0] or self._search_regex(
2373             r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2374             'video id', default=None)
2375         if video_id:
2376             if self._downloader.params.get('noplaylist'):
2377                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2378                 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2379             else:
2380                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2381                 return video_id, None
2382         return None, None
2383
2384     def _real_extract(self, url):
2385         # Extract playlist id
2386         mobj = re.match(self._VALID_URL, url)
2387         if mobj is None:
2388             raise ExtractorError('Invalid URL: %s' % url)
2389         playlist_id = mobj.group(1) or mobj.group(2)
2390
2391         video_id, video = self._check_download_just_video(url, playlist_id)
2392         if video:
2393             return video
2394
2395         if playlist_id.startswith(('RD', 'UL', 'PU')):
2396             # Mixes require a custom extraction process
2397             return self._extract_mix(playlist_id)
2398
2399         has_videos, playlist = self._extract_playlist(playlist_id)
2400         if has_videos or not video_id:
2401             return playlist
2402
2403         # Some playlist URLs don't actually serve a playlist (see
2404         # https://github.com/rg3/youtube-dl/issues/10537).
2405         # Fallback to plain video extraction if there is a video id
2406         # along with playlist id.
2407         return self.url_result(video_id, 'Youtube', video_id=video_id)
2408
2409
2410 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2411     IE_DESC = 'YouTube.com channels'
2412     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2413     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2414     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2415     IE_NAME = 'youtube:channel'
2416     _TESTS = [{
2417         'note': 'paginated channel',
2418         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2419         'playlist_mincount': 91,
2420         'info_dict': {
2421             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2422             'title': 'Uploads from lex will',
2423         }
2424     }, {
2425         'note': 'Age restricted channel',
2426         # from https://www.youtube.com/user/DeusExOfficial
2427         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2428         'playlist_mincount': 64,
2429         'info_dict': {
2430             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2431             'title': 'Uploads from Deus Ex',
2432         },
2433     }]
2434
2435     @classmethod
2436     def suitable(cls, url):
2437         return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2438                 else super(YoutubeChannelIE, cls).suitable(url))
2439
2440     def _build_template_url(self, url, channel_id):
2441         return self._TEMPLATE_URL % channel_id
2442
2443     def _real_extract(self, url):
2444         channel_id = self._match_id(url)
2445
2446         url = self._build_template_url(url, channel_id)
2447
2448         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2449         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2450         # otherwise fallback on channel by page extraction
2451         channel_page = self._download_webpage(
2452             url + '?view=57', channel_id,
2453             'Downloading channel page', fatal=False)
2454         if channel_page is False:
2455             channel_playlist_id = False
2456         else:
2457             channel_playlist_id = self._html_search_meta(
2458                 'channelId', channel_page, 'channel id', default=None)
2459             if not channel_playlist_id:
2460                 channel_url = self._html_search_meta(
2461                     ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2462                     channel_page, 'channel url', default=None)
2463                 if channel_url:
2464                     channel_playlist_id = self._search_regex(
2465                         r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2466                         channel_url, 'channel id', default=None)
2467         if channel_playlist_id and channel_playlist_id.startswith('UC'):
2468             playlist_id = 'UU' + channel_playlist_id[2:]
2469             return self.url_result(
2470                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2471
2472         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2473         autogenerated = re.search(r'''(?x)
2474                 class="[^"]*?(?:
2475                     channel-header-autogenerated-label|
2476                     yt-channel-title-autogenerated
2477                 )[^"]*"''', channel_page) is not None
2478
2479         if autogenerated:
2480             # The videos are contained in a single page
2481             # the ajax pages can't be used, they are empty
2482             entries = [
2483                 self.url_result(
2484                     video_id, 'Youtube', video_id=video_id,
2485                     video_title=video_title)
2486                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2487             return self.playlist_result(entries, channel_id)
2488
2489         try:
2490             next(self._entries(channel_page, channel_id))
2491         except StopIteration:
2492             alert_message = self._html_search_regex(
2493                 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2494                 channel_page, 'alert', default=None, group='alert')
2495             if alert_message:
2496                 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2497
2498         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2499
2500
2501 class YoutubeUserIE(YoutubeChannelIE):
2502     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2503     _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2504     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2505     IE_NAME = 'youtube:user'
2506
2507     _TESTS = [{
2508         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2509         'playlist_mincount': 320,
2510         'info_dict': {
2511             'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2512             'title': 'Uploads from The Linux Foundation',
2513         }
2514     }, {
2515         # Only available via https://www.youtube.com/c/12minuteathlete/videos
2516         # but not https://www.youtube.com/user/12minuteathlete/videos
2517         'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2518         'playlist_mincount': 249,
2519         'info_dict': {
2520             'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2521             'title': 'Uploads from 12 Minute Athlete',
2522         }
2523     }, {
2524         'url': 'ytuser:phihag',
2525         'only_matching': True,
2526     }, {
2527         'url': 'https://www.youtube.com/c/gametrailers',
2528         'only_matching': True,
2529     }, {
2530         'url': 'https://www.youtube.com/gametrailers',
2531         'only_matching': True,
2532     }, {
2533         # This channel is not available, geo restricted to JP
2534         'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2535         'only_matching': True,
2536     }]
2537
2538     @classmethod
2539     def suitable(cls, url):
2540         # Don't return True if the url can be extracted with other youtube
2541         # extractor, the regex would is too permissive and it would match.
2542         other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2543         if any(ie.suitable(url) for ie in other_yt_ies):
2544             return False
2545         else:
2546             return super(YoutubeUserIE, cls).suitable(url)
2547
2548     def _build_template_url(self, url, channel_id):
2549         mobj = re.match(self._VALID_URL, url)
2550         return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2551
2552
2553 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2554     IE_DESC = 'YouTube.com live streams'
2555     _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2556     IE_NAME = 'youtube:live'
2557
2558     _TESTS = [{
2559         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2560         'info_dict': {
2561             'id': 'a48o2S1cPoo',
2562             'ext': 'mp4',
2563             'title': 'The Young Turks - Live Main Show',
2564             'uploader': 'The Young Turks',
2565             'uploader_id': 'TheYoungTurks',
2566             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2567             'upload_date': '20150715',
2568             'license': 'Standard YouTube License',
2569             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2570             'categories': ['News & Politics'],
2571             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2572             'like_count': int,
2573             'dislike_count': int,
2574         },
2575         'params': {
2576             'skip_download': True,
2577         },
2578     }, {
2579         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2580         'only_matching': True,
2581     }, {
2582         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2583         'only_matching': True,
2584     }, {
2585         'url': 'https://www.youtube.com/TheYoungTurks/live',
2586         'only_matching': True,
2587     }]
2588
2589     def _real_extract(self, url):
2590         mobj = re.match(self._VALID_URL, url)
2591         channel_id = mobj.group('id')
2592         base_url = mobj.group('base_url')
2593         webpage = self._download_webpage(url, channel_id, fatal=False)
2594         if webpage:
2595             page_type = self._og_search_property(
2596                 'type', webpage, 'page type', default='')
2597             video_id = self._html_search_meta(
2598                 'videoId', webpage, 'video id', default=None)
2599             if page_type.startswith('video') and video_id and re.match(
2600                     r'^[0-9A-Za-z_-]{11}$', video_id):
2601                 return self.url_result(video_id, YoutubeIE.ie_key())
2602         return self.url_result(base_url)
2603
2604
2605 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2606     IE_DESC = 'YouTube.com user/channel playlists'
2607     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2608     IE_NAME = 'youtube:playlists'
2609
2610     _TESTS = [{
2611         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2612         'playlist_mincount': 4,
2613         'info_dict': {
2614             'id': 'ThirstForScience',
2615             'title': 'Thirst for Science',
2616         },
2617     }, {
2618         # with "Load more" button
2619         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2620         'playlist_mincount': 70,
2621         'info_dict': {
2622             'id': 'igorkle1',
2623             'title': 'Игорь Клейнер',
2624         },
2625     }, {
2626         'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2627         'playlist_mincount': 17,
2628         'info_dict': {
2629             'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2630             'title': 'Chem Player',
2631         },
2632     }]
2633
2634
2635 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2636     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2637
2638
2639 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2640     IE_DESC = 'YouTube.com searches'
2641     # there doesn't appear to be a real limit, for example if you search for
2642     # 'python' you get more than 8.000.000 results
2643     _MAX_RESULTS = float('inf')
2644     IE_NAME = 'youtube:search'
2645     _SEARCH_KEY = 'ytsearch'
2646     _EXTRA_QUERY_ARGS = {}
2647     _TESTS = []
2648
2649     def _get_n_results(self, query, n):
2650         """Get a specified number of results for a query"""
2651
2652         videos = []
2653         limit = n
2654
2655         url_query = {
2656             'search_query': query.encode('utf-8'),
2657         }
2658         url_query.update(self._EXTRA_QUERY_ARGS)
2659         result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2660
2661         for pagenum in itertools.count(1):
2662             data = self._download_json(
2663                 result_url, video_id='query "%s"' % query,
2664                 note='Downloading page %s' % pagenum,
2665                 errnote='Unable to download API page',
2666                 query={'spf': 'navigate'})
2667             html_content = data[1]['body']['content']
2668
2669             if 'class="search-message' in html_content:
2670                 raise ExtractorError(
2671                     '[youtube] No video results', expected=True)
2672
2673             new_videos = list(self._process_page(html_content))
2674             videos += new_videos
2675             if not new_videos or len(videos) > limit:
2676                 break
2677             next_link = self._html_search_regex(
2678                 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2679                 html_content, 'next link', default=None)
2680             if next_link is None:
2681                 break
2682             result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2683
2684         if len(videos) > n:
2685             videos = videos[:n]
2686         return self.playlist_result(videos, query)
2687
2688
2689 class YoutubeSearchDateIE(YoutubeSearchIE):
2690     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2691     _SEARCH_KEY = 'ytsearchdate'
2692     IE_DESC = 'YouTube.com searches, newest videos first'
2693     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2694
2695
2696 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2697     IE_DESC = 'YouTube.com search URLs'
2698     IE_NAME = 'youtube:search_url'
2699     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2700     _TESTS = [{
2701         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2702         'playlist_mincount': 5,
2703         'info_dict': {
2704             'title': 'youtube-dl test video',
2705         }
2706     }, {
2707         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2708         'only_matching': True,
2709     }]
2710
2711     def _real_extract(self, url):
2712         mobj = re.match(self._VALID_URL, url)
2713         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2714         webpage = self._download_webpage(url, query)
2715         return self.playlist_result(self._process_page(webpage), playlist_title=query)
2716
2717
2718 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2719     IE_DESC = 'YouTube.com (multi-season) shows'
2720     _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2721     IE_NAME = 'youtube:show'
2722     _TESTS = [{
2723         'url': 'https://www.youtube.com/show/airdisasters',
2724         'playlist_mincount': 5,
2725         'info_dict': {
2726             'id': 'airdisasters',
2727             'title': 'Air Disasters',
2728         }
2729     }]
2730
2731     def _real_extract(self, url):
2732         playlist_id = self._match_id(url)
2733         return super(YoutubeShowIE, self)._real_extract(
2734             'https://www.youtube.com/show/%s/playlists' % playlist_id)
2735
2736
2737 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2738     """
2739     Base class for feed extractors
2740     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2741     """
2742     _LOGIN_REQUIRED = True
2743
2744     @property
2745     def IE_NAME(self):
2746         return 'youtube:%s' % self._FEED_NAME
2747
2748     def _real_initialize(self):
2749         self._login()
2750
2751     def _entries(self, page):
2752         # The extraction process is the same as for playlists, but the regex
2753         # for the video ids doesn't contain an index
2754         ids = []
2755         more_widget_html = content_html = page
2756         for page_num in itertools.count(1):
2757             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2758
2759             # 'recommended' feed has infinite 'load more' and each new portion spins
2760             # the same videos in (sometimes) slightly different order, so we'll check
2761             # for unicity and break when portion has no new videos
2762             new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2763             if not new_ids:
2764                 break
2765
2766             ids.extend(new_ids)
2767
2768             for entry in self._ids_to_results(new_ids):
2769                 yield entry
2770
2771             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2772             if not mobj:
2773                 break
2774
2775             more = self._download_json(
2776                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2777                 'Downloading page #%s' % page_num,
2778                 transform_source=uppercase_escape)
2779             content_html = more['content_html']
2780             more_widget_html = more['load_more_widget_html']
2781
2782     def _real_extract(self, url):
2783         page = self._download_webpage(
2784             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2785             self._PLAYLIST_TITLE)
2786         return self.playlist_result(
2787             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2788
2789
2790 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2791     IE_NAME = 'youtube:watchlater'
2792     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2793     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2794
2795     _TESTS = [{
2796         'url': 'https://www.youtube.com/playlist?list=WL',
2797         'only_matching': True,
2798     }, {
2799         'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2800         'only_matching': True,
2801     }]
2802
2803     def _real_extract(self, url):
2804         _, video = self._check_download_just_video(url, 'WL')
2805         if video:
2806             return video
2807         _, playlist = self._extract_playlist('WL')
2808         return playlist
2809
2810
2811 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2812     IE_NAME = 'youtube:favorites'
2813     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2814     _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2815     _LOGIN_REQUIRED = True
2816
2817     def _real_extract(self, url):
2818         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2819         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2820         return self.url_result(playlist_id, 'YoutubePlaylist')
2821
2822
2823 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2824     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2825     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2826     _FEED_NAME = 'recommended'
2827     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2828
2829
2830 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2831     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2832     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2833     _FEED_NAME = 'subscriptions'
2834     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2835
2836
2837 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2838     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2839     _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2840     _FEED_NAME = 'history'
2841     _PLAYLIST_TITLE = 'Youtube History'
2842
2843
2844 class YoutubeTruncatedURLIE(InfoExtractor):
2845     IE_NAME = 'youtube:truncated_url'
2846     IE_DESC = False  # Do not list
2847     _VALID_URL = r'''(?x)
2848         (?:https?://)?
2849         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2850         (?:watch\?(?:
2851             feature=[a-z_]+|
2852             annotation_id=annotation_[^&]+|
2853             x-yt-cl=[0-9]+|
2854             hl=[^&]*|
2855             t=[0-9]+
2856         )?
2857         |
2858             attribution_link\?a=[^&]+
2859         )
2860         $
2861     '''
2862
2863     _TESTS = [{
2864         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2865         'only_matching': True,
2866     }, {
2867         'url': 'https://www.youtube.com/watch?',
2868         'only_matching': True,
2869     }, {
2870         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2871         'only_matching': True,
2872     }, {
2873         'url': 'https://www.youtube.com/watch?feature=foo',
2874         'only_matching': True,
2875     }, {
2876         'url': 'https://www.youtube.com/watch?hl=en-GB',
2877         'only_matching': True,
2878     }, {
2879         'url': 'https://www.youtube.com/watch?t=2372',
2880         'only_matching': True,
2881     }]
2882
2883     def _real_extract(self, url):
2884         raise ExtractorError(
2885             'Did you forget to quote the URL? Remember that & is a meta '
2886             'character in most shells, so you want to put the URL in quotes, '
2887             'like  youtube-dl '
2888             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2889             ' or simply  youtube-dl BaW_jenozKc  .',
2890             expected=True)
2891
2892
2893 class YoutubeTruncatedIDIE(InfoExtractor):
2894     IE_NAME = 'youtube:truncated_id'
2895     IE_DESC = False  # Do not list
2896     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2897
2898     _TESTS = [{
2899         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2900         'only_matching': True,
2901     }]
2902
2903     def _real_extract(self, url):
2904         video_id = self._match_id(url)
2905         raise ExtractorError(
2906             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2907             expected=True)