Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 import collections
   4 import errno
   5 import io
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import socket
  11 import string
  12 import struct
  13 import traceback
  14 import xml.etree.ElementTree
  15 import zlib
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from .subtitles import SubtitlesInfoExtractor
  19 from ..utils import (
  20     compat_chr,
  21     compat_http_client,
  22     compat_parse_qs,
  23     compat_urllib_error,
  24     compat_urllib_parse,
  25     compat_urllib_request,
  26     compat_urlparse,
  27     compat_str,
  28
  29     clean_html,
  30     get_cachedir,
  31     get_element_by_id,
  32     ExtractorError,
  33     unescapeHTML,
  34     unified_strdate,
  35     orderedSet,
  36     write_json_file,
  37 )
  38
  39 class YoutubeBaseInfoExtractor(InfoExtractor):
  40     """Provide base functions for Youtube extractors"""
  41     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  42     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  43     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  44     _NETRC_MACHINE = 'youtube'
  45     # If True it will raise an error if no login info is provided
  46     _LOGIN_REQUIRED = False
  47
  48     def report_lang(self):
  49         """Report attempt to set language."""
  50         self.to_screen(u'Setting language')
  51
  52     def _set_language(self):
  53         request = compat_urllib_request.Request(self._LANG_URL)
  54         try:
  55             self.report_lang()
  56             compat_urllib_request.urlopen(request).read()
  57         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  58             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
  59             return False
  60         return True
  61
  62     def _login(self):
  63         (username, password) = self._get_login_info()
  64         # No authentication to be performed
  65         if username is None:
  66             if self._LOGIN_REQUIRED:
  67                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  68             return False
  69
  70         request = compat_urllib_request.Request(self._LOGIN_URL)
  71         try:
  72             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
  73         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  74             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
  75             return False
  76
  77         galx = None
  78         dsh = None
  79         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
  80         if match:
  81           galx = match.group(1)
  82         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
  83         if match:
  84           dsh = match.group(1)
  85
  86         # Log in
  87         login_form_strs = {
  88                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  89                 u'Email': username,
  90                 u'GALX': galx,
  91                 u'Passwd': password,
  92                 u'PersistentCookie': u'yes',
  93                 u'_utf8': u'霱',
  94                 u'bgresponse': u'js_disabled',
  95                 u'checkConnection': u'',
  96                 u'checkedDomains': u'youtube',
  97                 u'dnConn': u'',
  98                 u'dsh': dsh,
  99                 u'pstMsg': u'0',
 100                 u'rmShown': u'1',
 101                 u'secTok': u'',
 102                 u'signIn': u'Sign in',
 103                 u'timeStmp': u'',
 104                 u'service': u'youtube',
 105                 u'uilel': u'3',
 106                 u'hl': u'en_US',
 107         }
 108         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
 109         # chokes on unicode
 110         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
 111         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
 112         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
 113         try:
 114             self.report_login()
 115             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
 116             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 117                 self._downloader.report_warning(u'unable to log in: bad username or password')
 118                 return False
 119         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 120             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
 121             return False
 122         return True
 123
 124     def _confirm_age(self):
 125         age_form = {
 126                 'next_url':     '/',
 127                 'action_confirm':   'Confirm',
 128                 }
 129         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
 130         try:
 131             self.report_age_confirmation()
 132             compat_urllib_request.urlopen(request).read().decode('utf-8')
 133         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 134             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
 135         return True
 136
 137     def _real_initialize(self):
 138         if self._downloader is None:
 139             return
 140         if not self._set_language():
 141             return
 142         if not self._login():
 143             return
 144         self._confirm_age()
 145
 146
 147 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 148     IE_DESC = u'YouTube.com'
 149     _VALID_URL = r"""^
 150                      (
 151                          (?:https?://)?                                       # http(s):// (optional)
 152                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 153                             tube\.majestyc\.net/|
 154                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 155                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 156                          (?:                                                  # the various things that can precede the ID:
 157                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
 158                              |(?:                                             # or the v= param in all its forms
 159                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 160                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 161                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 162                                  v=
 163                              )
 164                          ))
 165                          |youtu\.be/                                          # just youtu.be/xxxx
 166                          )
 167                      )?                                                       # all until now is optional -> you can pass the naked ID
 168                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 169                      (?(1).+)?                                                # if we found the ID, everything can follow
 170                      $"""
 171     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 172     # Listed in order of quality
 173     _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 174                           # Apple HTTP Live Streaming
 175                           '96', '95', '94', '93', '92', '132', '151',
 176                           # 3D
 177                           '85', '84', '102', '83', '101', '82', '100',
 178                           # Dash video
 179                           '138', '137', '248', '136', '247', '135', '246',
 180                           '245', '244', '134', '243', '133', '242', '160',
 181                           # Dash audio
 182                           '141', '172', '140', '171', '139',
 183                           ]
 184     _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 185                                       # Apple HTTP Live Streaming
 186                                       '96', '95', '94', '93', '92', '132', '151',
 187                                       # 3D
 188                                       '85', '102', '84', '101', '83', '100', '82',
 189                                       # Dash video
 190                                       '138', '248', '137', '247', '136', '246', '245',
 191                                       '244', '135', '243', '134', '242', '133', '160',
 192                                       # Dash audio
 193                                       '172', '141', '171', '140', '139',
 194                                       ]
 195     _video_formats_map = {
 196         'flv': ['35', '34', '6', '5'],
 197         '3gp': ['36', '17', '13'],
 198         'mp4': ['38', '37', '22', '18'],
 199         'webm': ['46', '45', '44', '43'],
 200     }
 201     _video_extensions = {
 202         '13': '3gp',
 203         '17': '3gp',
 204         '18': 'mp4',
 205         '22': 'mp4',
 206         '36': '3gp',
 207         '37': 'mp4',
 208         '38': 'mp4',
 209         '43': 'webm',
 210         '44': 'webm',
 211         '45': 'webm',
 212         '46': 'webm',
 213
 214         # 3d videos
 215         '82': 'mp4',
 216         '83': 'mp4',
 217         '84': 'mp4',
 218         '85': 'mp4',
 219         '100': 'webm',
 220         '101': 'webm',
 221         '102': 'webm',
 222
 223         # Apple HTTP Live Streaming
 224         '92': 'mp4',
 225         '93': 'mp4',
 226         '94': 'mp4',
 227         '95': 'mp4',
 228         '96': 'mp4',
 229         '132': 'mp4',
 230         '151': 'mp4',
 231
 232         # Dash mp4
 233         '133': 'mp4',
 234         '134': 'mp4',
 235         '135': 'mp4',
 236         '136': 'mp4',
 237         '137': 'mp4',
 238         '138': 'mp4',
 239         '139': 'mp4',
 240         '140': 'mp4',
 241         '141': 'mp4',
 242         '160': 'mp4',
 243
 244         # Dash webm
 245         '171': 'webm',
 246         '172': 'webm',
 247         '242': 'webm',
 248         '243': 'webm',
 249         '244': 'webm',
 250         '245': 'webm',
 251         '246': 'webm',
 252         '247': 'webm',
 253         '248': 'webm',
 254     }
 255     _video_dimensions = {
 256         '5': '240x400',
 257         '6': '???',
 258         '13': '???',
 259         '17': '144x176',
 260         '18': '360x640',
 261         '22': '720x1280',
 262         '34': '360x640',
 263         '35': '480x854',
 264         '36': '240x320',
 265         '37': '1080x1920',
 266         '38': '3072x4096',
 267         '43': '360x640',
 268         '44': '480x854',
 269         '45': '720x1280',
 270         '46': '1080x1920',
 271         '82': '360p',
 272         '83': '480p',
 273         '84': '720p',
 274         '85': '1080p',
 275         '92': '240p',
 276         '93': '360p',
 277         '94': '480p',
 278         '95': '720p',
 279         '96': '1080p',
 280         '100': '360p',
 281         '101': '480p',
 282         '102': '720p',
 283         '132': '240p',
 284         '151': '72p',
 285         '133': '240p',
 286         '134': '360p',
 287         '135': '480p',
 288         '136': '720p',
 289         '137': '1080p',
 290         '138': '>1080p',
 291         '139': '48k',
 292         '140': '128k',
 293         '141': '256k',
 294         '160': '192p',
 295         '171': '128k',
 296         '172': '256k',
 297         '242': '240p',
 298         '243': '360p',
 299         '244': '480p',
 300         '245': '480p',
 301         '246': '480p',
 302         '247': '720p',
 303         '248': '1080p',
 304     }
 305     _special_itags = {
 306         '82': '3D',
 307         '83': '3D',
 308         '84': '3D',
 309         '85': '3D',
 310         '100': '3D',
 311         '101': '3D',
 312         '102': '3D',
 313         '133': 'DASH Video',
 314         '134': 'DASH Video',
 315         '135': 'DASH Video',
 316         '136': 'DASH Video',
 317         '137': 'DASH Video',
 318         '138': 'DASH Video',
 319         '139': 'DASH Audio',
 320         '140': 'DASH Audio',
 321         '141': 'DASH Audio',
 322         '160': 'DASH Video',
 323         '171': 'DASH Audio',
 324         '172': 'DASH Audio',
 325         '242': 'DASH Video',
 326         '243': 'DASH Video',
 327         '244': 'DASH Video',
 328         '245': 'DASH Video',
 329         '246': 'DASH Video',
 330         '247': 'DASH Video',
 331         '248': 'DASH Video',
 332     }
 333
 334     IE_NAME = u'youtube'
 335     _TESTS = [
 336         {
 337             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
 338             u"file":  u"BaW_jenozKc.mp4",
 339             u"info_dict": {
 340                 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
 341                 u"uploader": u"Philipp Hagemeister",
 342                 u"uploader_id": u"phihag",
 343                 u"upload_date": u"20121002",
 344                 u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
 345             }
 346         },
 347         {
 348             u"url":  u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
 349             u"file":  u"1ltcDfZMA3U.flv",
 350             u"note": u"Test VEVO video (#897)",
 351             u"info_dict": {
 352                 u"upload_date": u"20070518",
 353                 u"title": u"Maps - It Will Find You",
 354                 u"description": u"Music video by Maps performing It Will Find You.",
 355                 u"uploader": u"MuteUSA",
 356                 u"uploader_id": u"MuteUSA"
 357             }
 358         },
 359         {
 360             u"url":  u"http://www.youtube.com/watch?v=UxxajLWwzqY",
 361             u"file":  u"UxxajLWwzqY.mp4",
 362             u"note": u"Test generic use_cipher_signature video (#897)",
 363             u"info_dict": {
 364                 u"upload_date": u"20120506",
 365                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 366                 u"description": u"md5:5b292926389560516e384ac437c0ec07",
 367                 u"uploader": u"Icona Pop",
 368                 u"uploader_id": u"IconaPop"
 369             }
 370         },
 371         {
 372             u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
 373             u"file":  u"07FYdnEawAQ.mp4",
 374             u"note": u"Test VEVO video with age protection (#956)",
 375             u"info_dict": {
 376                 u"upload_date": u"20130703",
 377                 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
 378                 u"description": u"md5:64249768eec3bc4276236606ea996373",
 379                 u"uploader": u"justintimberlakeVEVO",
 380                 u"uploader_id": u"justintimberlakeVEVO"
 381             }
 382         },
 383     ]
 384
 385
 386     @classmethod
 387     def suitable(cls, url):
 388         """Receives a URL and returns True if suitable for this IE."""
 389         if YoutubePlaylistIE.suitable(url): return False
 390         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 391
 392     def __init__(self, *args, **kwargs):
 393         super(YoutubeIE, self).__init__(*args, **kwargs)
 394         self._player_cache = {}
 395
 396     def report_video_webpage_download(self, video_id):
 397         """Report attempt to download video webpage."""
 398         self.to_screen(u'%s: Downloading video webpage' % video_id)
 399
 400     def report_video_info_webpage_download(self, video_id):
 401         """Report attempt to download video info webpage."""
 402         self.to_screen(u'%s: Downloading video info webpage' % video_id)
 403
 404     def report_information_extraction(self, video_id):
 405         """Report attempt to extract video information."""
 406         self.to_screen(u'%s: Extracting video information' % video_id)
 407
 408     def report_unavailable_format(self, video_id, format):
 409         """Report extracted video URL."""
 410         self.to_screen(u'%s: Format %s not available' % (video_id, format))
 411
 412     def report_rtmp_download(self):
 413         """Indicate the download will use the RTMP protocol."""
 414         self.to_screen(u'RTMP download detected')
 415
 416     def _extract_signature_function(self, video_id, player_url, slen):
 417         id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
 418                         player_url)
 419         player_type = id_m.group('ext')
 420         player_id = id_m.group('id')
 421
 422         # Read from filesystem cache
 423         func_id = '%s_%s_%d' % (player_type, player_id, slen)
 424         assert os.path.basename(func_id) == func_id
 425         cache_dir = get_cachedir(self._downloader.params)
 426
 427         cache_enabled = cache_dir is not None
 428         if cache_enabled:
 429             cache_fn = os.path.join(os.path.expanduser(cache_dir),
 430                                     u'youtube-sigfuncs',
 431                                     func_id + '.json')
 432             try:
 433                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
 434                     cache_spec = json.load(cachef)
 435                 return lambda s: u''.join(s[i] for i in cache_spec)
 436             except IOError:
 437                 pass  # No cache available
 438
 439         if player_type == 'js':
 440             code = self._download_webpage(
 441                 player_url, video_id,
 442                 note=u'Downloading %s player %s' % (player_type, player_id),
 443                 errnote=u'Download of %s failed' % player_url)
 444             res = self._parse_sig_js(code)
 445         elif player_type == 'swf':
 446             urlh = self._request_webpage(
 447                 player_url, video_id,
 448                 note=u'Downloading %s player %s' % (player_type, player_id),
 449                 errnote=u'Download of %s failed' % player_url)
 450             code = urlh.read()
 451             res = self._parse_sig_swf(code)
 452         else:
 453             assert False, 'Invalid player type %r' % player_type
 454
 455         if cache_enabled:
 456             try:
 457                 test_string = u''.join(map(compat_chr, range(slen)))
 458                 cache_res = res(test_string)
 459                 cache_spec = [ord(c) for c in cache_res]
 460                 try:
 461                     os.makedirs(os.path.dirname(cache_fn))
 462                 except OSError as ose:
 463                     if ose.errno != errno.EEXIST:
 464                         raise
 465                 write_json_file(cache_spec, cache_fn)
 466             except Exception:
 467                 tb = traceback.format_exc()
 468                 self._downloader.report_warning(
 469                     u'Writing cache to %r failed: %s' % (cache_fn, tb))
 470
 471         return res
 472
 473     def _print_sig_code(self, func, slen):
 474         def gen_sig_code(idxs):
 475             def _genslice(start, end, step):
 476                 starts = u'' if start == 0 else str(start)
 477                 ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
 478                 steps = u'' if step == 1 else (u':%d' % step)
 479                 return u's[%s%s%s]' % (starts, ends, steps)
 480
 481             step = None
 482             start = '(Never used)'  # Quelch pyflakes warnings - start will be
 483                                     # set as soon as step is set
 484             for i, prev in zip(idxs[1:], idxs[:-1]):
 485                 if step is not None:
 486                     if i - prev == step:
 487                         continue
 488                     yield _genslice(start, prev, step)
 489                     step = None
 490                     continue
 491                 if i - prev in [-1, 1]:
 492                     step = i - prev
 493                     start = prev
 494                     continue
 495                 else:
 496                     yield u's[%d]' % prev
 497             if step is None:
 498                 yield u's[%d]' % i
 499             else:
 500                 yield _genslice(start, i, step)
 501
 502         test_string = u''.join(map(compat_chr, range(slen)))
 503         cache_res = func(test_string)
 504         cache_spec = [ord(c) for c in cache_res]
 505         expr_code = u' + '.join(gen_sig_code(cache_spec))
 506         code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
 507         self.to_screen(u'Extracted signature function:\n' + code)
 508
 509     def _parse_sig_js(self, jscode):
 510         funcname = self._search_regex(
 511             r'signature=([a-zA-Z]+)', jscode,
 512             u'Initial JS player signature function name')
 513
 514         functions = {}
 515
 516         def argidx(varname):
 517             return string.lowercase.index(varname)
 518
 519         def interpret_statement(stmt, local_vars, allow_recursion=20):
 520             if allow_recursion < 0:
 521                 raise ExtractorError(u'Recursion limit reached')
 522
 523             if stmt.startswith(u'var '):
 524                 stmt = stmt[len(u'var '):]
 525             ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
 526                              r'=(?P<expr>.*)$', stmt)
 527             if ass_m:
 528                 if ass_m.groupdict().get('index'):
 529                     def assign(val):
 530                         lvar = local_vars[ass_m.group('out')]
 531                         idx = interpret_expression(ass_m.group('index'),
 532                                                    local_vars, allow_recursion)
 533                         assert isinstance(idx, int)
 534                         lvar[idx] = val
 535                         return val
 536                     expr = ass_m.group('expr')
 537                 else:
 538                     def assign(val):
 539                         local_vars[ass_m.group('out')] = val
 540                         return val
 541                     expr = ass_m.group('expr')
 542             elif stmt.startswith(u'return '):
 543                 assign = lambda v: v
 544                 expr = stmt[len(u'return '):]
 545             else:
 546                 raise ExtractorError(
 547                     u'Cannot determine left side of statement in %r' % stmt)
 548
 549             v = interpret_expression(expr, local_vars, allow_recursion)
 550             return assign(v)
 551
 552         def interpret_expression(expr, local_vars, allow_recursion):
 553             if expr.isdigit():
 554                 return int(expr)
 555
 556             if expr.isalpha():
 557                 return local_vars[expr]
 558
 559             m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
 560             if m:
 561                 member = m.group('member')
 562                 val = local_vars[m.group('in')]
 563                 if member == 'split("")':
 564                     return list(val)
 565                 if member == 'join("")':
 566                     return u''.join(val)
 567                 if member == 'length':
 568                     return len(val)
 569                 if member == 'reverse()':
 570                     return val[::-1]
 571                 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
 572                 if slice_m:
 573                     idx = interpret_expression(
 574                         slice_m.group('idx'), local_vars, allow_recursion-1)
 575                     return val[idx:]
 576
 577             m = re.match(
 578                 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
 579             if m:
 580                 val = local_vars[m.group('in')]
 581                 idx = interpret_expression(m.group('idx'), local_vars,
 582                                            allow_recursion-1)
 583                 return val[idx]
 584
 585             m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
 586             if m:
 587                 a = interpret_expression(m.group('a'),
 588                                          local_vars, allow_recursion)
 589                 b = interpret_expression(m.group('b'),
 590                                          local_vars, allow_recursion)
 591                 return a % b
 592
 593             m = re.match(
 594                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
 595             if m:
 596                 fname = m.group('func')
 597                 if fname not in functions:
 598                     functions[fname] = extract_function(fname)
 599                 argvals = [int(v) if v.isdigit() else local_vars[v]
 600                            for v in m.group('args').split(',')]
 601                 return functions[fname](argvals)
 602             raise ExtractorError(u'Unsupported JS expression %r' % expr)
 603
 604         def extract_function(funcname):
 605             func_m = re.search(
 606                 r'function ' + re.escape(funcname) +
 607                 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
 608                 jscode)
 609             argnames = func_m.group('args').split(',')
 610
 611             def resf(args):
 612                 local_vars = dict(zip(argnames, args))
 613                 for stmt in func_m.group('code').split(';'):
 614                     res = interpret_statement(stmt, local_vars)
 615                 return res
 616             return resf
 617
 618         initial_function = extract_function(funcname)
 619         return lambda s: initial_function([s])
 620
 621     def _parse_sig_swf(self, file_contents):
 622         if file_contents[1:3] != b'WS':
 623             raise ExtractorError(
 624                 u'Not an SWF file; header is %r' % file_contents[:3])
 625         if file_contents[:1] == b'C':
 626             content = zlib.decompress(file_contents[8:])
 627         else:
 628             raise NotImplementedError(u'Unsupported compression format %r' %
 629                                       file_contents[:1])
 630
 631         def extract_tags(content):
 632             pos = 0
 633             while pos < len(content):
 634                 header16 = struct.unpack('<H', content[pos:pos+2])[0]
 635                 pos += 2
 636                 tag_code = header16 >> 6
 637                 tag_len = header16 & 0x3f
 638                 if tag_len == 0x3f:
 639                     tag_len = struct.unpack('<I', content[pos:pos+4])[0]
 640                     pos += 4
 641                 assert pos+tag_len <= len(content)
 642                 yield (tag_code, content[pos:pos+tag_len])
 643                 pos += tag_len
 644
 645         code_tag = next(tag
 646                         for tag_code, tag in extract_tags(content)
 647                         if tag_code == 82)
 648         p = code_tag.index(b'\0', 4) + 1
 649         code_reader = io.BytesIO(code_tag[p:])
 650
 651         # Parse ABC (AVM2 ByteCode)
 652         def read_int(reader=None):
 653             if reader is None:
 654                 reader = code_reader
 655             res = 0
 656             shift = 0
 657             for _ in range(5):
 658                 buf = reader.read(1)
 659                 assert len(buf) == 1
 660                 b = struct.unpack('<B', buf)[0]
 661                 res = res | ((b & 0x7f) << shift)
 662                 if b & 0x80 == 0:
 663                     break
 664                 shift += 7
 665             return res
 666
 667         def u30(reader=None):
 668             res = read_int(reader)
 669             assert res & 0xf0000000 == 0
 670             return res
 671         u32 = read_int
 672
 673         def s32(reader=None):
 674             v = read_int(reader)
 675             if v & 0x80000000 != 0:
 676                 v = - ((v ^ 0xffffffff) + 1)
 677             return v
 678
 679         def read_string(reader=None):
 680             if reader is None:
 681                 reader = code_reader
 682             slen = u30(reader)
 683             resb = reader.read(slen)
 684             assert len(resb) == slen
 685             return resb.decode('utf-8')
 686
 687         def read_bytes(count, reader=None):
 688             if reader is None:
 689                 reader = code_reader
 690             resb = reader.read(count)
 691             assert len(resb) == count
 692             return resb
 693
 694         def read_byte(reader=None):
 695             resb = read_bytes(1, reader=reader)
 696             res = struct.unpack('<B', resb)[0]
 697             return res
 698
 699         # minor_version + major_version
 700         read_bytes(2 + 2)
 701
 702         # Constant pool
 703         int_count = u30()
 704         for _c in range(1, int_count):
 705             s32()
 706         uint_count = u30()
 707         for _c in range(1, uint_count):
 708             u32()
 709         double_count = u30()
 710         read_bytes((double_count-1) * 8)
 711         string_count = u30()
 712         constant_strings = [u'']
 713         for _c in range(1, string_count):
 714             s = read_string()
 715             constant_strings.append(s)
 716         namespace_count = u30()
 717         for _c in range(1, namespace_count):
 718             read_bytes(1)  # kind
 719             u30()  # name
 720         ns_set_count = u30()
 721         for _c in range(1, ns_set_count):
 722             count = u30()
 723             for _c2 in range(count):
 724                 u30()
 725         multiname_count = u30()
 726         MULTINAME_SIZES = {
 727             0x07: 2,  # QName
 728             0x0d: 2,  # QNameA
 729             0x0f: 1,  # RTQName
 730             0x10: 1,  # RTQNameA
 731             0x11: 0,  # RTQNameL
 732             0x12: 0,  # RTQNameLA
 733             0x09: 2,  # Multiname
 734             0x0e: 2,  # MultinameA
 735             0x1b: 1,  # MultinameL
 736             0x1c: 1,  # MultinameLA
 737         }
 738         multinames = [u'']
 739         for _c in range(1, multiname_count):
 740             kind = u30()
 741             assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
 742             if kind == 0x07:
 743                 u30()  # namespace_idx
 744                 name_idx = u30()
 745                 multinames.append(constant_strings[name_idx])
 746             else:
 747                 multinames.append('[MULTINAME kind: %d]' % kind)
 748                 for _c2 in range(MULTINAME_SIZES[kind]):
 749                     u30()
 750
 751         # Methods
 752         method_count = u30()
 753         MethodInfo = collections.namedtuple(
 754             'MethodInfo',
 755             ['NEED_ARGUMENTS', 'NEED_REST'])
 756         method_infos = []
 757         for method_id in range(method_count):
 758             param_count = u30()
 759             u30()  # return type
 760             for _ in range(param_count):
 761                 u30()  # param type
 762             u30()  # name index (always 0 for youtube)
 763             flags = read_byte()
 764             if flags & 0x08 != 0:
 765                 # Options present
 766                 option_count = u30()
 767                 for c in range(option_count):
 768                     u30()  # val
 769                     read_bytes(1)  # kind
 770             if flags & 0x80 != 0:
 771                 # Param names present
 772                 for _ in range(param_count):
 773                     u30()  # param name
 774             mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
 775             method_infos.append(mi)
 776
 777         # Metadata
 778         metadata_count = u30()
 779         for _c in range(metadata_count):
 780             u30()  # name
 781             item_count = u30()
 782             for _c2 in range(item_count):
 783                 u30()  # key
 784                 u30()  # value
 785
 786         def parse_traits_info():
 787             trait_name_idx = u30()
 788             kind_full = read_byte()
 789             kind = kind_full & 0x0f
 790             attrs = kind_full >> 4
 791             methods = {}
 792             if kind in [0x00, 0x06]:  # Slot or Const
 793                 u30()  # Slot id
 794                 u30()  # type_name_idx
 795                 vindex = u30()
 796                 if vindex != 0:
 797                     read_byte()  # vkind
 798             elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
 799                 u30()  # disp_id
 800                 method_idx = u30()
 801                 methods[multinames[trait_name_idx]] = method_idx
 802             elif kind == 0x04:  # Class
 803                 u30()  # slot_id
 804                 u30()  # classi
 805             elif kind == 0x05:  # Function
 806                 u30()  # slot_id
 807                 function_idx = u30()
 808                 methods[function_idx] = multinames[trait_name_idx]
 809             else:
 810                 raise ExtractorError(u'Unsupported trait kind %d' % kind)
 811
 812             if attrs & 0x4 != 0:  # Metadata present
 813                 metadata_count = u30()
 814                 for _c3 in range(metadata_count):
 815                     u30()  # metadata index
 816
 817             return methods
 818
 819         # Classes
 820         TARGET_CLASSNAME = u'SignatureDecipher'
 821         searched_idx = multinames.index(TARGET_CLASSNAME)
 822         searched_class_id = None
 823         class_count = u30()
 824         for class_id in range(class_count):
 825             name_idx = u30()
 826             if name_idx == searched_idx:
 827                 # We found the class we're looking for!
 828                 searched_class_id = class_id
 829             u30()  # super_name idx
 830             flags = read_byte()
 831             if flags & 0x08 != 0:  # Protected namespace is present
 832                 u30()  # protected_ns_idx
 833             intrf_count = u30()
 834             for _c2 in range(intrf_count):
 835                 u30()
 836             u30()  # iinit
 837             trait_count = u30()
 838             for _c2 in range(trait_count):
 839                 parse_traits_info()
 840
 841         if searched_class_id is None:
 842             raise ExtractorError(u'Target class %r not found' %
 843                                  TARGET_CLASSNAME)
 844
 845         method_names = {}
 846         method_idxs = {}
 847         for class_id in range(class_count):
 848             u30()  # cinit
 849             trait_count = u30()
 850             for _c2 in range(trait_count):
 851                 trait_methods = parse_traits_info()
 852                 if class_id == searched_class_id:
 853                     method_names.update(trait_methods.items())
 854                     method_idxs.update(dict(
 855                         (idx, name)
 856                         for name, idx in trait_methods.items()))
 857
 858         # Scripts
 859         script_count = u30()
 860         for _c in range(script_count):
 861             u30()  # init
 862             trait_count = u30()
 863             for _c2 in range(trait_count):
 864                 parse_traits_info()
 865
 866         # Method bodies
 867         method_body_count = u30()
 868         Method = collections.namedtuple('Method', ['code', 'local_count'])
 869         methods = {}
 870         for _c in range(method_body_count):
 871             method_idx = u30()
 872             u30()  # max_stack
 873             local_count = u30()
 874             u30()  # init_scope_depth
 875             u30()  # max_scope_depth
 876             code_length = u30()
 877             code = read_bytes(code_length)
 878             if method_idx in method_idxs:
 879                 m = Method(code, local_count)
 880                 methods[method_idxs[method_idx]] = m
 881             exception_count = u30()
 882             for _c2 in range(exception_count):
 883                 u30()  # from
 884                 u30()  # to
 885                 u30()  # target
 886                 u30()  # exc_type
 887                 u30()  # var_name
 888             trait_count = u30()
 889             for _c2 in range(trait_count):
 890                 parse_traits_info()
 891
 892         assert p + code_reader.tell() == len(code_tag)
 893         assert len(methods) == len(method_idxs)
 894
 895         method_pyfunctions = {}
 896
 897         def extract_function(func_name):
 898             if func_name in method_pyfunctions:
 899                 return method_pyfunctions[func_name]
 900             if func_name not in methods:
 901                 raise ExtractorError(u'Cannot find function %r' % func_name)
 902             m = methods[func_name]
 903
 904             def resfunc(args):
 905                 registers = ['(this)'] + list(args) + [None] * m.local_count
 906                 stack = []
 907                 coder = io.BytesIO(m.code)
 908                 while True:
 909                     opcode = struct.unpack('!B', coder.read(1))[0]
 910                     if opcode == 36:  # pushbyte
 911                         v = struct.unpack('!B', coder.read(1))[0]
 912                         stack.append(v)
 913                     elif opcode == 44:  # pushstring
 914                         idx = u30(coder)
 915                         stack.append(constant_strings[idx])
 916                     elif opcode == 48:  # pushscope
 917                         # We don't implement the scope register, so we'll just
 918                         # ignore the popped value
 919                         stack.pop()
 920                     elif opcode == 70:  # callproperty
 921                         index = u30(coder)
 922                         mname = multinames[index]
 923                         arg_count = u30(coder)
 924                         args = list(reversed(
 925                             [stack.pop() for _ in range(arg_count)]))
 926                         obj = stack.pop()
 927                         if mname == u'split':
 928                             assert len(args) == 1
 929                             assert isinstance(args[0], compat_str)
 930                             assert isinstance(obj, compat_str)
 931                             if args[0] == u'':
 932                                 res = list(obj)
 933                             else:
 934                                 res = obj.split(args[0])
 935                             stack.append(res)
 936                         elif mname == u'slice':
 937                             assert len(args) == 1
 938                             assert isinstance(args[0], int)
 939                             assert isinstance(obj, list)
 940                             res = obj[args[0]:]
 941                             stack.append(res)
 942                         elif mname == u'join':
 943                             assert len(args) == 1
 944                             assert isinstance(args[0], compat_str)
 945                             assert isinstance(obj, list)
 946                             res = args[0].join(obj)
 947                             stack.append(res)
 948                         elif mname in method_pyfunctions:
 949                             stack.append(method_pyfunctions[mname](args))
 950                         else:
 951                             raise NotImplementedError(
 952                                 u'Unsupported property %r on %r'
 953                                 % (mname, obj))
 954                     elif opcode == 72:  # returnvalue
 955                         res = stack.pop()
 956                         return res
 957                     elif opcode == 79:  # callpropvoid
 958                         index = u30(coder)
 959                         mname = multinames[index]
 960                         arg_count = u30(coder)
 961                         args = list(reversed(
 962                             [stack.pop() for _ in range(arg_count)]))
 963                         obj = stack.pop()
 964                         if mname == u'reverse':
 965                             assert isinstance(obj, list)
 966                             obj.reverse()
 967                         else:
 968                             raise NotImplementedError(
 969                                 u'Unsupported (void) property %r on %r'
 970                                 % (mname, obj))
 971                     elif opcode == 93:  # findpropstrict
 972                         index = u30(coder)
 973                         mname = multinames[index]
 974                         res = extract_function(mname)
 975                         stack.append(res)
 976                     elif opcode == 97:  # setproperty
 977                         index = u30(coder)
 978                         value = stack.pop()
 979                         idx = stack.pop()
 980                         obj = stack.pop()
 981                         assert isinstance(obj, list)
 982                         assert isinstance(idx, int)
 983                         obj[idx] = value
 984                     elif opcode == 98:  # getlocal
 985                         index = u30(coder)
 986                         stack.append(registers[index])
 987                     elif opcode == 99:  # setlocal
 988                         index = u30(coder)
 989                         value = stack.pop()
 990                         registers[index] = value
 991                     elif opcode == 102:  # getproperty
 992                         index = u30(coder)
 993                         pname = multinames[index]
 994                         if pname == u'length':
 995                             obj = stack.pop()
 996                             assert isinstance(obj, list)
 997                             stack.append(len(obj))
 998                         else:  # Assume attribute access
 999                             idx = stack.pop()
1000                             assert isinstance(idx, int)
1001                             obj = stack.pop()
1002                             assert isinstance(obj, list)
1003                             stack.append(obj[idx])
1004                     elif opcode == 128:  # coerce
1005                         u30(coder)
1006                     elif opcode == 133:  # coerce_s
1007                         assert isinstance(stack[-1], (type(None), compat_str))
1008                     elif opcode == 164:  # modulo
1009                         value2 = stack.pop()
1010                         value1 = stack.pop()
1011                         res = value1 % value2
1012                         stack.append(res)
1013                     elif opcode == 208:  # getlocal_0
1014                         stack.append(registers[0])
1015                     elif opcode == 209:  # getlocal_1
1016                         stack.append(registers[1])
1017                     elif opcode == 210:  # getlocal_2
1018                         stack.append(registers[2])
1019                     elif opcode == 211:  # getlocal_3
1020                         stack.append(registers[3])
1021                     elif opcode == 214:  # setlocal_2
1022                         registers[2] = stack.pop()
1023                     elif opcode == 215:  # setlocal_3
1024                         registers[3] = stack.pop()
1025                     else:
1026                         raise NotImplementedError(
1027                             u'Unsupported opcode %d' % opcode)
1028
1029             method_pyfunctions[func_name] = resfunc
1030             return resfunc
1031
1032         initial_function = extract_function(u'decipher')
1033         return lambda s: initial_function([s])
1034
1035     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1036         """Turn the encrypted s field into a working signature"""
1037
1038         if player_url is not None:
1039             try:
1040                 player_id = (player_url, len(s))
1041                 if player_id not in self._player_cache:
1042                     func = self._extract_signature_function(
1043                         video_id, player_url, len(s)
1044                     )
1045                     self._player_cache[player_id] = func
1046                 func = self._player_cache[player_id]
1047                 if self._downloader.params.get('youtube_print_sig_code'):
1048                     self._print_sig_code(func, len(s))
1049                 return func(s)
1050             except Exception:
1051                 tb = traceback.format_exc()
1052                 self._downloader.report_warning(
1053                     u'Automatic signature extraction failed: ' + tb)
1054
1055             self._downloader.report_warning(
1056                 u'Warning: Falling back to static signature algorithm')
1057
1058         return self._static_decrypt_signature(
1059             s, video_id, player_url, age_gate)
1060
1061     def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
1062         if age_gate:
1063             # The videos with age protection use another player, so the
1064             # algorithms can be different.
1065             if len(s) == 86:
1066                 return s[2:63] + s[82] + s[64:82] + s[63]
1067
1068         if len(s) == 93:
1069             return s[86:29:-1] + s[88] + s[28:5:-1]
1070         elif len(s) == 92:
1071             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1072         elif len(s) == 91:
1073             return s[84:27:-1] + s[86] + s[26:5:-1]
1074         elif len(s) == 90:
1075             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1076         elif len(s) == 89:
1077             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1078         elif len(s) == 88:
1079             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1080         elif len(s) == 87:
1081             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1082         elif len(s) == 86:
1083             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
1084         elif len(s) == 85:
1085             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1086         elif len(s) == 84:
1087             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
1088         elif len(s) == 83:
1089             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
1090         elif len(s) == 82:
1091             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
1092         elif len(s) == 81:
1093             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1094         elif len(s) == 80:
1095             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1096         elif len(s) == 79:
1097             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1098
1099         else:
1100             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1101
1102     def _get_available_subtitles(self, video_id):
1103         try:
1104             sub_list = self._download_webpage(
1105                 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1106                 video_id, note=False)
1107         except ExtractorError as err:
1108             self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1109             return {}
1110         lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1111
1112         sub_lang_list = {}
1113         for l in lang_list:
1114             lang = l[1]
1115             params = compat_urllib_parse.urlencode({
1116                 'lang': lang,
1117                 'v': video_id,
1118                 'fmt': self._downloader.params.get('subtitlesformat'),
1119                 'name': l[0],
1120             })
1121             url = u'http://www.youtube.com/api/timedtext?' + params
1122             sub_lang_list[lang] = url
1123         if not sub_lang_list:
1124             self._downloader.report_warning(u'video doesn\'t have subtitles')
1125             return {}
1126         return sub_lang_list
1127
1128     def _get_available_automatic_caption(self, video_id, webpage):
1129         """We need the webpage for getting the captions url, pass it as an
1130            argument to speed up the process."""
1131         sub_format = self._downloader.params.get('subtitlesformat')
1132         self.to_screen(u'%s: Looking for automatic captions' % video_id)
1133         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1134         err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1135         if mobj is None:
1136             self._downloader.report_warning(err_msg)
1137             return {}
1138         player_config = json.loads(mobj.group(1))
1139         try:
1140             args = player_config[u'args']
1141             caption_url = args[u'ttsurl']
1142             timestamp = args[u'timestamp']
1143             # We get the available subtitles
1144             list_params = compat_urllib_parse.urlencode({
1145                 'type': 'list',
1146                 'tlangs': 1,
1147                 'asrs': 1,
1148             })
1149             list_url = caption_url + '&' + list_params
1150             list_page = self._download_webpage(list_url, video_id)
1151             caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1152             original_lang_node = caption_list.find('track')
1153             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
1154                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1155                 return {}
1156             original_lang = original_lang_node.attrib['lang_code']
1157
1158             sub_lang_list = {}
1159             for lang_node in caption_list.findall('target'):
1160                 sub_lang = lang_node.attrib['lang_code']
1161                 params = compat_urllib_parse.urlencode({
1162                     'lang': original_lang,
1163                     'tlang': sub_lang,
1164                     'fmt': sub_format,
1165                     'ts': timestamp,
1166                     'kind': 'asr',
1167                 })
1168                 sub_lang_list[sub_lang] = caption_url + '&' + params
1169             return sub_lang_list
1170         # An extractor error can be raise by the download process if there are
1171         # no automatic captions but there are subtitles
1172         except (KeyError, ExtractorError):
1173             self._downloader.report_warning(err_msg)
1174             return {}
1175
1176     def _print_formats(self, formats):
1177         print('Available formats:')
1178         for x in formats:
1179             print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1180                                         self._video_dimensions.get(x, '???'),
1181                                         ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1182
1183     def _extract_id(self, url):
1184         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1185         if mobj is None:
1186             raise ExtractorError(u'Invalid URL: %s' % url)
1187         video_id = mobj.group(2)
1188         return video_id
1189
1190     def _get_video_url_list(self, url_map):
1191         """
1192         Transform a dictionary in the format {itag:url} to a list of (itag, url)
1193         with the requested formats.
1194         """
1195         req_format = self._downloader.params.get('format', None)
1196         format_limit = self._downloader.params.get('format_limit', None)
1197         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1198         if format_limit is not None and format_limit in available_formats:
1199             format_list = available_formats[available_formats.index(format_limit):]
1200         else:
1201             format_list = available_formats
1202         existing_formats = [x for x in format_list if x in url_map]
1203         if len(existing_formats) == 0:
1204             raise ExtractorError(u'no known formats available for video')
1205         if self._downloader.params.get('listformats', None):
1206             self._print_formats(existing_formats)
1207             return
1208         if req_format is None or req_format == 'best':
1209             video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1210         elif req_format == 'worst':
1211             video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1212         elif req_format in ('-1', 'all'):
1213             video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1214         else:
1215             # Specific formats. We pick the first in a slash-delimeted sequence.
1216             # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1217             # available in the specified format. For example,
1218             # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1219             # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1220             # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1221             req_formats = req_format.split('/')
1222             video_url_list = None
1223             for rf in req_formats:
1224                 if rf in url_map:
1225                     video_url_list = [(rf, url_map[rf])]
1226                     break
1227                 if rf in self._video_formats_map:
1228                     for srf in self._video_formats_map[rf]:
1229                         if srf in url_map:
1230                             video_url_list = [(srf, url_map[srf])]
1231                             break
1232                     else:
1233                         continue
1234                     break
1235             if video_url_list is None:
1236                 raise ExtractorError(u'requested format not available')
1237         return video_url_list
1238
1239     def _extract_from_m3u8(self, manifest_url, video_id):
1240         url_map = {}
1241         def _get_urls(_manifest):
1242             lines = _manifest.split('\n')
1243             urls = filter(lambda l: l and not l.startswith('#'),
1244                             lines)
1245             return urls
1246         manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1247         formats_urls = _get_urls(manifest)
1248         for format_url in formats_urls:
1249             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1250             url_map[itag] = format_url
1251         return url_map
1252
1253     def _extract_annotations(self, video_id):
1254         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1255         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
1256
1257     def _real_extract(self, url):
1258         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1259         mobj = re.search(self._NEXT_URL_RE, url)
1260         if mobj:
1261             url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1262         video_id = self._extract_id(url)
1263
1264         # Get video webpage
1265         self.report_video_webpage_download(video_id)
1266         url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1267         request = compat_urllib_request.Request(url)
1268         try:
1269             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1270         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1271             raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1272
1273         video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1274
1275         # Attempt to extract SWF player URL
1276         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1277         if mobj is not None:
1278             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1279         else:
1280             player_url = None
1281
1282         # Get video info
1283         self.report_video_info_webpage_download(video_id)
1284         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1285             self.report_age_confirmation()
1286             age_gate = True
1287             # We simulate the access to the video from www.youtube.com/v/{video_id}
1288             # this can be viewed without login into Youtube
1289             data = compat_urllib_parse.urlencode({'video_id': video_id,
1290                                                   'el': 'embedded',
1291                                                   'gl': 'US',
1292                                                   'hl': 'en',
1293                                                   'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1294                                                   'asv': 3,
1295                                                   'sts':'1588',
1296                                                   })
1297             video_info_url = 'https://www.youtube.com/get_video_info?' + data
1298             video_info_webpage = self._download_webpage(video_info_url, video_id,
1299                                     note=False,
1300                                     errnote='unable to download video info webpage')
1301             video_info = compat_parse_qs(video_info_webpage)
1302         else:
1303             age_gate = False
1304             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1305                 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1306                         % (video_id, el_type))
1307                 video_info_webpage = self._download_webpage(video_info_url, video_id,
1308                                         note=False,
1309                                         errnote='unable to download video info webpage')
1310                 video_info = compat_parse_qs(video_info_webpage)
1311                 if 'token' in video_info:
1312                     break
1313         if 'token' not in video_info:
1314             if 'reason' in video_info:
1315                 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1316             else:
1317                 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1318
1319         # Check for "rental" videos
1320         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1321             raise ExtractorError(u'"rental" videos not supported')
1322
1323         # Start extracting information
1324         self.report_information_extraction(video_id)
1325
1326         # uploader
1327         if 'author' not in video_info:
1328             raise ExtractorError(u'Unable to extract uploader name')
1329         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1330
1331         # uploader_id
1332         video_uploader_id = None
1333         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1334         if mobj is not None:
1335             video_uploader_id = mobj.group(1)
1336         else:
1337             self._downloader.report_warning(u'unable to extract uploader nickname')
1338
1339         # title
1340         if 'title' in video_info:
1341             video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1342         else:
1343             self._downloader.report_warning(u'Unable to extract video title')
1344             video_title = u'_'
1345
1346         # thumbnail image
1347         # We try first to get a high quality image:
1348         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1349                             video_webpage, re.DOTALL)
1350         if m_thumb is not None:
1351             video_thumbnail = m_thumb.group(1)
1352         elif 'thumbnail_url' not in video_info:
1353             self._downloader.report_warning(u'unable to extract video thumbnail')
1354             video_thumbnail = None
1355         else:   # don't panic if we can't find it
1356             video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1357
1358         # upload date
1359         upload_date = None
1360         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1361         if mobj is not None:
1362             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1363             upload_date = unified_strdate(upload_date)
1364
1365         # description
1366         video_description = get_element_by_id("eow-description", video_webpage)
1367         if video_description:
1368             video_description = clean_html(video_description)
1369         else:
1370             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1371             if fd_mobj:
1372                 video_description = unescapeHTML(fd_mobj.group(1))
1373             else:
1374                 video_description = u''
1375
1376         # subtitles
1377         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1378
1379         if self._downloader.params.get('listsubtitles', False):
1380             self._list_available_subtitles(video_id, video_webpage)
1381             return
1382
1383         if 'length_seconds' not in video_info:
1384             self._downloader.report_warning(u'unable to extract video duration')
1385             video_duration = ''
1386         else:
1387             video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1388
1389         # annotations
1390         video_annotations = None
1391         if self._downloader.params.get('writeannotations', False):
1392                 video_annotations = self._extract_annotations(video_id)
1393
1394         # Decide which formats to download
1395
1396         try:
1397             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1398             if not mobj:
1399                 raise ValueError('Could not find vevo ID')
1400             info = json.loads(mobj.group(1))
1401             args = info['args']
1402             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1403             # this signatures are encrypted
1404             if 'url_encoded_fmt_stream_map' not in args:
1405                 raise ValueError(u'No stream_map present')  # caught below
1406             m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1407             if m_s is not None:
1408                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1409                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1410             m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1411             if m_s is not None:
1412                 if 'url_encoded_fmt_stream_map' in video_info:
1413                     video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1414                 else:
1415                     video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1416             elif 'adaptive_fmts' in video_info:
1417                 if 'url_encoded_fmt_stream_map' in video_info:
1418                     video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1419                 else:
1420                     video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1421         except ValueError:
1422             pass
1423
1424         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1425             self.report_rtmp_download()
1426             video_url_list = [(None, video_info['conn'][0])]
1427         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1428             if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1429                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1430             url_map = {}
1431             for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1432                 url_data = compat_parse_qs(url_data_str)
1433                 if 'itag' in url_data and 'url' in url_data:
1434                     url = url_data['url'][0]
1435                     if 'sig' in url_data:
1436                         url += '&signature=' + url_data['sig'][0]
1437                     elif 's' in url_data:
1438                         encrypted_sig = url_data['s'][0]
1439                         if self._downloader.params.get('verbose'):
1440                             if age_gate:
1441                                 if player_url is None:
1442                                     player_version = 'unknown'
1443                                 else:
1444                                     player_version = self._search_regex(
1445                                         r'-(.+)\.swf$', player_url,
1446                                         u'flash player', fatal=False)
1447                                 player_desc = 'flash player %s' % player_version
1448                             else:
1449                                 player_version = self._search_regex(
1450                                     r'html5player-(.+?)\.js', video_webpage,
1451                                     'html5 player', fatal=False)
1452                                 player_desc = u'html5 player %s' % player_version
1453
1454                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1455                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1456                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1457
1458                         if not age_gate:
1459                             jsplayer_url_json = self._search_regex(
1460                                 r'"assets":.+?"js":\s*("[^"]+")',
1461                                 video_webpage, u'JS player URL')
1462                             player_url = json.loads(jsplayer_url_json)
1463
1464                         signature = self._decrypt_signature(
1465                             encrypted_sig, video_id, player_url, age_gate)
1466                         url += '&signature=' + signature
1467                     if 'ratebypass' not in url:
1468                         url += '&ratebypass=yes'
1469                     url_map[url_data['itag'][0]] = url
1470             video_url_list = self._get_video_url_list(url_map)
1471             if not video_url_list:
1472                 return
1473         elif video_info.get('hlsvp'):
1474             manifest_url = video_info['hlsvp'][0]
1475             url_map = self._extract_from_m3u8(manifest_url, video_id)
1476             video_url_list = self._get_video_url_list(url_map)
1477             if not video_url_list:
1478                 return
1479
1480         else:
1481             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1482
1483         results = []
1484         for format_param, video_real_url in video_url_list:
1485             # Extension
1486             video_extension = self._video_extensions.get(format_param, 'flv')
1487
1488             video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1489                                               self._video_dimensions.get(format_param, '???'),
1490                                               ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1491
1492             results.append({
1493                 'id':       video_id,
1494                 'url':      video_real_url,
1495                 'uploader': video_uploader,
1496                 'uploader_id': video_uploader_id,
1497                 'upload_date':  upload_date,
1498                 'title':    video_title,
1499                 'ext':      video_extension,
1500                 'format':   video_format,
1501                 'thumbnail':    video_thumbnail,
1502                 'description':  video_description,
1503                 'player_url':   player_url,
1504                 'subtitles':    video_subtitles,
1505                 'duration':     video_duration,
1506                 'age_limit':    18 if age_gate else 0,
1507                 'annotations':  video_annotations
1508             })
1509         return results
1510
1511 class YoutubePlaylistIE(InfoExtractor):
1512     IE_DESC = u'YouTube.com playlists'
1513     _VALID_URL = r"""(?:
1514                         (?:https?://)?
1515                         (?:\w+\.)?
1516                         youtube\.com/
1517                         (?:
1518                            (?:course|view_play_list|my_playlists|artist|playlist|watch)
1519                            \? (?:.*?&)*? (?:p|a|list)=
1520                         |  p/
1521                         )
1522                         ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1523                         .*
1524                      |
1525                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1526                      )"""
1527     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1528     _MAX_RESULTS = 50
1529     IE_NAME = u'youtube:playlist'
1530
1531     @classmethod
1532     def suitable(cls, url):
1533         """Receives a URL and returns True if suitable for this IE."""
1534         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1535
1536     def _real_extract(self, url):
1537         # Extract playlist id
1538         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1539         if mobj is None:
1540             raise ExtractorError(u'Invalid URL: %s' % url)
1541         playlist_id = mobj.group(1) or mobj.group(2)
1542
1543         # Check if it's a video-specific URL
1544         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1545         if 'v' in query_dict:
1546             video_id = query_dict['v'][0]
1547             if self._downloader.params.get('noplaylist'):
1548                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
1549                 return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
1550             else:
1551                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1552
1553         # Download playlist videos from API
1554         videos = []
1555
1556         for page_num in itertools.count(1):
1557             start_index = self._MAX_RESULTS * (page_num - 1) + 1
1558             if start_index >= 1000:
1559                 self._downloader.report_warning(u'Max number of results reached')
1560                 break
1561             url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1562             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1563
1564             try:
1565                 response = json.loads(page)
1566             except ValueError as err:
1567                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1568
1569             if 'feed' not in response:
1570                 raise ExtractorError(u'Got a malformed response from YouTube API')
1571             playlist_title = response['feed']['title']['$t']
1572             if 'entry' not in response['feed']:
1573                 # Number of videos is a multiple of self._MAX_RESULTS
1574                 break
1575
1576             for entry in response['feed']['entry']:
1577                 index = entry['yt$position']['$t']
1578                 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1579                     videos.append((
1580                         index,
1581                         'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1582                     ))
1583
1584         videos = [v[1] for v in sorted(videos)]
1585
1586         url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1587         return [self.playlist_result(url_results, playlist_id, playlist_title)]
1588
1589
1590 class YoutubeChannelIE(InfoExtractor):
1591     IE_DESC = u'YouTube.com channels'
1592     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1593     _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1594     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1595     _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1596     IE_NAME = u'youtube:channel'
1597
1598     def extract_videos_from_page(self, page):
1599         ids_in_page = []
1600         for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1601             if mobj.group(1) not in ids_in_page:
1602                 ids_in_page.append(mobj.group(1))
1603         return ids_in_page
1604
1605     def _real_extract(self, url):
1606         # Extract channel id
1607         mobj = re.match(self._VALID_URL, url)
1608         if mobj is None:
1609             raise ExtractorError(u'Invalid URL: %s' % url)
1610
1611         # Download channel page
1612         channel_id = mobj.group(1)
1613         video_ids = []
1614         pagenum = 1
1615
1616         url = self._TEMPLATE_URL % (channel_id, pagenum)
1617         page = self._download_webpage(url, channel_id,
1618                                       u'Downloading page #%s' % pagenum)
1619
1620         # Extract video identifiers
1621         ids_in_page = self.extract_videos_from_page(page)
1622         video_ids.extend(ids_in_page)
1623
1624         # Download any subsequent channel pages using the json-based channel_ajax query
1625         if self._MORE_PAGES_INDICATOR in page:
1626             for pagenum in itertools.count(1):
1627                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1628                 page = self._download_webpage(url, channel_id,
1629                                               u'Downloading page #%s' % pagenum)
1630
1631                 page = json.loads(page)
1632
1633                 ids_in_page = self.extract_videos_from_page(page['content_html'])
1634                 video_ids.extend(ids_in_page)
1635
1636                 if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
1637                     break
1638
1639         self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1640
1641         urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1642         url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1643         return [self.playlist_result(url_entries, channel_id)]
1644
1645
1646 class YoutubeUserIE(InfoExtractor):
1647     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1648     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1649     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1650     _GDATA_PAGE_SIZE = 50
1651     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1652     IE_NAME = u'youtube:user'
1653
1654     @classmethod
1655     def suitable(cls, url):
1656         # Don't return True if the url can be extracted with other youtube
1657         # extractor, the regex would is too permissive and it would match.
1658         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1659         if any(ie.suitable(url) for ie in other_ies): return False
1660         else: return super(YoutubeUserIE, cls).suitable(url)
1661
1662     def _real_extract(self, url):
1663         # Extract username
1664         mobj = re.match(self._VALID_URL, url)
1665         if mobj is None:
1666             raise ExtractorError(u'Invalid URL: %s' % url)
1667
1668         username = mobj.group(1)
1669
1670         # Download video ids using YouTube Data API. Result size per
1671         # query is limited (currently to 50 videos) so we need to query
1672         # page by page until there are no video ids - it means we got
1673         # all of them.
1674
1675         video_ids = []
1676
1677         for pagenum in itertools.count(0):
1678             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1679
1680             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1681             page = self._download_webpage(gdata_url, username,
1682                                           u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1683
1684             try:
1685                 response = json.loads(page)
1686             except ValueError as err:
1687                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1688             if 'entry' not in response['feed']:
1689                 # Number of videos is a multiple of self._MAX_RESULTS
1690                 break
1691
1692             # Extract video identifiers
1693             ids_in_page = []
1694             for entry in response['feed']['entry']:
1695                 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1696             video_ids.extend(ids_in_page)
1697
1698             # A little optimization - if current page is not
1699             # "full", ie. does not contain PAGE_SIZE video ids then
1700             # we can assume that this page is the last one - there
1701             # are no more ids on further pages - no need to query
1702             # again.
1703
1704             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1705                 break
1706
1707         urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1708         url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1709         return [self.playlist_result(url_results, playlist_title = username)]
1710
1711 class YoutubeSearchIE(SearchInfoExtractor):
1712     IE_DESC = u'YouTube.com searches'
1713     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1714     _MAX_RESULTS = 1000
1715     IE_NAME = u'youtube:search'
1716     _SEARCH_KEY = 'ytsearch'
1717
1718     def report_download_page(self, query, pagenum):
1719         """Report attempt to download search page with given number."""
1720         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1721
1722     def _get_n_results(self, query, n):
1723         """Get a specified number of results for a query"""
1724
1725         video_ids = []
1726         pagenum = 0
1727         limit = n
1728
1729         while (50 * pagenum) < limit:
1730             self.report_download_page(query, pagenum+1)
1731             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1732             request = compat_urllib_request.Request(result_url)
1733             try:
1734                 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1735             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1736                 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1737             api_response = json.loads(data)['data']
1738
1739             if not 'items' in api_response:
1740                 raise ExtractorError(u'[youtube] No video results')
1741
1742             new_ids = list(video['id'] for video in api_response['items'])
1743             video_ids += new_ids
1744
1745             limit = min(n, api_response['totalItems'])
1746             pagenum += 1
1747
1748         if len(video_ids) > n:
1749             video_ids = video_ids[:n]
1750         videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1751         return self.playlist_result(videos, query)
1752
1753
1754 class YoutubeShowIE(InfoExtractor):
1755     IE_DESC = u'YouTube.com (multi-season) shows'
1756     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1757     IE_NAME = u'youtube:show'
1758
1759     def _real_extract(self, url):
1760         mobj = re.match(self._VALID_URL, url)
1761         show_name = mobj.group(1)
1762         webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1763         # There's one playlist for each season of the show
1764         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1765         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1766         return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1767
1768
1769 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1770     """
1771     Base class for extractors that fetch info from
1772     http://www.youtube.com/feed_ajax
1773     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1774     """
1775     _LOGIN_REQUIRED = True
1776     _PAGING_STEP = 30
1777     # use action_load_personal_feed instead of action_load_system_feed
1778     _PERSONAL_FEED = False
1779
1780     @property
1781     def _FEED_TEMPLATE(self):
1782         action = 'action_load_system_feed'
1783         if self._PERSONAL_FEED:
1784             action = 'action_load_personal_feed'
1785         return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1786
1787     @property
1788     def IE_NAME(self):
1789         return u'youtube:%s' % self._FEED_NAME
1790
1791     def _real_initialize(self):
1792         self._login()
1793
1794     def _real_extract(self, url):
1795         feed_entries = []
1796         # The step argument is available only in 2.7 or higher
1797         for i in itertools.count(0):
1798             paging = i*self._PAGING_STEP
1799             info = self._download_webpage(self._FEED_TEMPLATE % paging,
1800                                           u'%s feed' % self._FEED_NAME,
1801                                           u'Downloading page %s' % i)
1802             info = json.loads(info)
1803             feed_html = info['feed_html']
1804             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1805             ids = orderedSet(m.group(1) for m in m_ids)
1806             feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1807             if info['paging'] is None:
1808                 break
1809         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1810
1811 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1812     IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1813     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1814     _FEED_NAME = 'subscriptions'
1815     _PLAYLIST_TITLE = u'Youtube Subscriptions'
1816
1817 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1818     IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1819     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1820     _FEED_NAME = 'recommended'
1821     _PLAYLIST_TITLE = u'Youtube Recommended videos'
1822
1823 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1824     IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1825     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1826     _FEED_NAME = 'watch_later'
1827     _PLAYLIST_TITLE = u'Youtube Watch Later'
1828     _PAGING_STEP = 100
1829     _PERSONAL_FEED = True
1830
1831 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1832     IE_NAME = u'youtube:favorites'
1833     IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1834     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1835     _LOGIN_REQUIRED = True
1836
1837     def _real_extract(self, url):
1838         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1839         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1840         return self.url_result(playlist_id, 'YoutubePlaylist')
1841
1842
1843 class YoutubeTruncatedURLIE(InfoExtractor):
1844     IE_NAME = 'youtube:truncated_url'
1845     IE_DESC = False  # Do not list
1846     _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
1847
1848     def _real_extract(self, url):
1849         raise ExtractorError(
1850             u'Did you forget to quote the URL? Remember that & is a meta '
1851             u'character in most shells, so you want to put the URL in quotes, '
1852             u'like  youtube-dl '
1853             u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
1854             u' (or simply  youtube-dl BaW_jenozKc  ).',
1855             expected=True)