11 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  12 from .subtitles 
import SubtitlesInfoExtractor
 
  13 from ..jsinterp 
import JSInterpreter
 
  14 from ..swfinterp 
import SWFInterpreter
 
  19     compat_urllib_request
, 
  26     get_element_by_attribute
, 
  37 class YoutubeBaseInfoExtractor(InfoExtractor
): 
  38     """Provide base functions for Youtube extractors""" 
  39     _LOGIN_URL 
= 'https://accounts.google.com/ServiceLogin' 
  40     _LANG_URL 
= r
'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
  41     _AGE_URL 
= 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
  42     _NETRC_MACHINE 
= 'youtube' 
  43     # If True it will raise an error if no login info is provided 
  44     _LOGIN_REQUIRED 
= False 
  46     def _set_language(self
): 
  47         return bool(self
._download
_webpage
( 
  49             note
=u
'Setting language', errnote
='unable to set language', 
  53         (username
, password
) = self
._get
_login
_info
() 
  54         # No authentication to be performed 
  56             if self
._LOGIN
_REQUIRED
: 
  57                 raise ExtractorError(u
'No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  60         login_page 
= self
._download
_webpage
( 
  61             self
._LOGIN
_URL
, None, 
  62             note
=u
'Downloading login page', 
  63             errnote
=u
'unable to fetch login page', fatal
=False) 
  64         if login_page 
is False: 
  67         galx 
= self
._search
_regex
(r
'(?s)<input.+?name="GALX".+?value="(.+?)"', 
  68                                   login_page
, u
'Login GALX parameter') 
  72                 u
'continue': u
'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', 
  76                 u
'PersistentCookie': u
'yes', 
  78                 u
'bgresponse': u
'js_disabled', 
  79                 u
'checkConnection': u
'', 
  80                 u
'checkedDomains': u
'youtube', 
  85                 u
'signIn': u
'Sign in', 
  87                 u
'service': u
'youtube', 
  91         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode 
  93         login_form 
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
,v 
in login_form_strs
.items()) 
  94         login_data 
= compat_urllib_parse
.urlencode(login_form
).encode('ascii') 
  96         req 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
) 
  97         login_results 
= self
._download
_webpage
( 
  99             note
=u
'Logging in', errnote
=u
'unable to log in', fatal
=False) 
 100         if login_results 
is False: 
 102         if re
.search(r
'(?i)<form[^>]* id="gaia_loginform"', login_results
) is not None: 
 103             self
._downloader
.report_warning(u
'unable to log in: bad username or password') 
 107     def _confirm_age(self
): 
 110             'action_confirm': 'Confirm', 
 112         req 
= compat_urllib_request
.Request(self
._AGE
_URL
, 
 113             compat_urllib_parse
.urlencode(age_form
).encode('ascii')) 
 115         self
._download
_webpage
( 
 117             note
=u
'Confirming age', errnote
=u
'Unable to confirm age') 
 120     def _real_initialize(self
): 
 121         if self
._downloader 
is None: 
 123         if not self
._set
_language
(): 
 125         if not self
._login
(): 
 130 class YoutubeIE(YoutubeBaseInfoExtractor
, SubtitlesInfoExtractor
): 
 131     IE_DESC 
= u
'YouTube.com' 
 132     _VALID_URL 
= r
"""(?x)^ 
 134                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional) 
 135                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| 
 136                             (?:www\.)?deturl\.com/www\.youtube\.com/| 
 137                             (?:www\.)?pwnyoutube\.com/| 
 138                             (?:www\.)?yourepeat\.com/| 
 139                             tube\.majestyc\.net/| 
 140                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains 
 141                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls 
 142                          (?:                                                  # the various things that can precede the ID: 
 143                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ 
 144                              |(?:                                             # or the v= param in all its forms 
 145                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 
 146                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! 
 147                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) 
 151                          |youtu\.be/                                          # just youtu.be/xxxx 
 152                          |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= 
 154                      )?                                                       # all until now is optional -> you can pass the naked ID 
 155                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID 
 156                      (?(1).+)?                                                # if we found the ID, everything can follow 
 158     _NEXT_URL_RE 
= r
'[\?&]next_url=([^&]+)' 
 160         '5': {'ext': 'flv', 'width': 400, 'height': 240}, 
 161         '6': {'ext': 'flv', 'width': 450, 'height': 270}, 
 162         '13': {'ext': '3gp'}, 
 163         '17': {'ext': '3gp', 'width': 176, 'height': 144}, 
 164         '18': {'ext': 'mp4', 'width': 640, 'height': 360}, 
 165         '22': {'ext': 'mp4', 'width': 1280, 'height': 720}, 
 166         '34': {'ext': 'flv', 'width': 640, 'height': 360}, 
 167         '35': {'ext': 'flv', 'width': 854, 'height': 480}, 
 168         '36': {'ext': '3gp', 'width': 320, 'height': 240}, 
 169         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080}, 
 170         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072}, 
 171         '43': {'ext': 'webm', 'width': 640, 'height': 360}, 
 172         '44': {'ext': 'webm', 'width': 854, 'height': 480}, 
 173         '45': {'ext': 'webm', 'width': 1280, 'height': 720}, 
 174         '46': {'ext': 'webm', 'width': 1920, 'height': 1080}, 
 178         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20}, 
 179         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20}, 
 180         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20}, 
 181         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20}, 
 182         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20}, 
 183         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20}, 
 184         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20}, 
 186         # Apple HTTP Live Streaming 
 187         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, 
 188         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10}, 
 189         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10}, 
 190         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10}, 
 191         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10}, 
 192         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, 
 193         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10}, 
 196         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 197         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 198         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 199         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 200         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 201         '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 202         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 203         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 206         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, 
 207         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50}, 
 208         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, 
 211         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 212         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 213         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 214         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 215         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 216         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, 
 217         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 218         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 219         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 220         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 221         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 222         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 223         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 224         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 225         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, 
 228         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, 
 229         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, 
 232         '_rtmp': {'protocol': 'rtmp'}, 
 238             u
"url":  u
"http://www.youtube.com/watch?v=BaW_jenozKc", 
 239             u
"file":  u
"BaW_jenozKc.mp4", 
 241                 u
"title": u
"youtube-dl test video \"'/\\ä↭𝕐", 
 242                 u
"uploader": u
"Philipp Hagemeister", 
 243                 u
"uploader_id": u
"phihag", 
 244                 u
"upload_date": u
"20121002", 
 245                 u
"description": u
"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", 
 246                 u
"categories": [u
'Science & Technology'], 
 250             u
"url":  u
"http://www.youtube.com/watch?v=UxxajLWwzqY", 
 251             u
"file":  u
"UxxajLWwzqY.mp4", 
 252             u
"note": u
"Test generic use_cipher_signature video (#897)", 
 254                 u
"upload_date": u
"20120506", 
 255                 u
"title": u
"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", 
 256                 u
"description": u
"md5:fea86fda2d5a5784273df5c7cc994d9f", 
 257                 u
"uploader": u
"Icona Pop", 
 258                 u
"uploader_id": u
"IconaPop" 
 262             u
"url":  u
"https://www.youtube.com/watch?v=07FYdnEawAQ", 
 263             u
"file":  u
"07FYdnEawAQ.mp4", 
 264             u
"note": u
"Test VEVO video with age protection (#956)", 
 266                 u
"upload_date": u
"20130703", 
 267                 u
"title": u
"Justin Timberlake - Tunnel Vision (Explicit)", 
 268                 u
"description": u
"md5:64249768eec3bc4276236606ea996373", 
 269                 u
"uploader": u
"justintimberlakeVEVO", 
 270                 u
"uploader_id": u
"justintimberlakeVEVO" 
 274             u
"url":  u
"//www.YouTube.com/watch?v=yZIXLfi8CZQ", 
 275             u
"file":  u
"yZIXLfi8CZQ.mp4", 
 276             u
"note": u
"Embed-only video (#1746)", 
 278                 u
"upload_date": u
"20120608", 
 279                 u
"title": u
"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012", 
 280                 u
"description": u
"md5:09b78bd971f1e3e289601dfba15ca4f7", 
 281                 u
"uploader": u
"SET India", 
 282                 u
"uploader_id": u
"setindia" 
 286             u
"url": u
"http://www.youtube.com/watch?v=a9LDPn-MO4I", 
 287             u
"file": u
"a9LDPn-MO4I.m4a", 
 288             u
"note": u
"256k DASH audio (format 141) via DASH manifest", 
 290                 u
"upload_date": "20121002", 
 291                 u
"uploader_id": "8KVIDEO", 
 292                 u
"description": "No description available.", 
 293                 u
"uploader": "8KVIDEO", 
 294                 u
"title": "UHDTV TEST 8K VIDEO.mp4" 
 297                 u
"youtube_include_dash_manifest": True, 
 301         # DASH manifest with encrypted signature 
 303             u
'url': u
'https://www.youtube.com/watch?v=IB3lcPjvWLA', 
 305                 u
'id': u
'IB3lcPjvWLA', 
 307                 u
'title': u
'Afrojack - The Spark ft. Spree Wilson', 
 308                 u
'description': u
'md5:9717375db5a9a3992be4668bbf3bc0a8', 
 309                 u
'uploader': u
'AfrojackVEVO', 
 310                 u
'uploader_id': u
'AfrojackVEVO', 
 311                 u
'upload_date': u
'20131011', 
 314                 u
'youtube_include_dash_manifest': True, 
 322     def suitable(cls
, url
): 
 323         """Receives a URL and returns True if suitable for this IE.""" 
 324         if YoutubePlaylistIE
.suitable(url
): return False 
 325         return re
.match(cls
._VALID
_URL
, url
) is not None 
 327     def __init__(self
, *args
, **kwargs
): 
 328         super(YoutubeIE
, self
).__init
__(*args
, **kwargs
) 
 329         self
._player
_cache 
= {} 
 331     def report_video_info_webpage_download(self
, video_id
): 
 332         """Report attempt to download video info webpage.""" 
 333         self
.to_screen(u
'%s: Downloading video info webpage' % video_id
) 
 335     def report_information_extraction(self
, video_id
): 
 336         """Report attempt to extract video information.""" 
 337         self
.to_screen(u
'%s: Extracting video information' % video_id
) 
 339     def report_unavailable_format(self
, video_id
, format
): 
 340         """Report extracted video URL.""" 
 341         self
.to_screen(u
'%s: Format %s not available' % (video_id
, format
)) 
 343     def report_rtmp_download(self
): 
 344         """Indicate the download will use the RTMP protocol.""" 
 345         self
.to_screen(u
'RTMP download detected') 
 347     def _signature_cache_id(self
, example_sig
): 
 348         """ Return a string representation of a signature """ 
 349         return u
'.'.join(compat_str(len(part
)) for part 
in example_sig
.split('.')) 
 351     def _extract_signature_function(self
, video_id
, player_url
, example_sig
): 
 353             r
'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$', 
 356             raise ExtractorError('Cannot identify player %r' % player_url
) 
 357         player_type 
= id_m
.group('ext') 
 358         player_id 
= id_m
.group('id') 
 360         # Read from filesystem cache 
 361         func_id 
= '%s_%s_%s' % ( 
 362             player_type
, player_id
, self
._signature
_cache
_id
(example_sig
)) 
 363         assert os
.path
.basename(func_id
) == func_id
 
 364         cache_dir 
= get_cachedir(self
._downloader
.params
) 
 366         cache_enabled 
= cache_dir 
is not None 
 368             cache_fn 
= os
.path
.join(os
.path
.expanduser(cache_dir
), 
 372                 with io
.open(cache_fn
, 'r', encoding
='utf-8') as cachef
: 
 373                     cache_spec 
= json
.load(cachef
) 
 374                 return lambda s
: u
''.join(s
[i
] for i 
in cache_spec
) 
 376                 pass  # No cache available 
 378         if player_type 
== 'js': 
 379             code 
= self
._download
_webpage
( 
 380                 player_url
, video_id
, 
 381                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 382                 errnote
=u
'Download of %s failed' % player_url
) 
 383             res 
= self
._parse
_sig
_js
(code
) 
 384         elif player_type 
== 'swf': 
 385             urlh 
= self
._request
_webpage
( 
 386                 player_url
, video_id
, 
 387                 note
=u
'Downloading %s player %s' % (player_type
, player_id
), 
 388                 errnote
=u
'Download of %s failed' % player_url
) 
 390             res 
= self
._parse
_sig
_swf
(code
) 
 392             assert False, 'Invalid player type %r' % player_type
 
 396                 test_string 
= u
''.join(map(compat_chr
, range(len(example_sig
)))) 
 397                 cache_res 
= res(test_string
) 
 398                 cache_spec 
= [ord(c
) for c 
in cache_res
] 
 400                     os
.makedirs(os
.path
.dirname(cache_fn
)) 
 401                 except OSError as ose
: 
 402                     if ose
.errno 
!= errno
.EEXIST
: 
 404                 write_json_file(cache_spec
, cache_fn
) 
 406                 tb 
= traceback
.format_exc() 
 407                 self
._downloader
.report_warning( 
 408                     u
'Writing cache to %r failed: %s' % (cache_fn
, tb
)) 
 412     def _print_sig_code(self
, func
, example_sig
): 
 413         def gen_sig_code(idxs
): 
 414             def _genslice(start
, end
, step
): 
 415                 starts 
= u
'' if start 
== 0 else str(start
) 
 416                 ends 
= (u
':%d' % (end
+step
)) if end 
+ step 
>= 0 else u
':' 
 417                 steps 
= u
'' if step 
== 1 else (u
':%d' % step
) 
 418                 return u
's[%s%s%s]' % (starts
, ends
, steps
) 
 421             start 
= '(Never used)'  # Quelch pyflakes warnings - start will be 
 422                                     # set as soon as step is set 
 423             for i
, prev 
in zip(idxs
[1:], idxs
[:-1]): 
 427                     yield _genslice(start
, prev
, step
) 
 430                 if i 
- prev 
in [-1, 1]: 
 435                     yield u
's[%d]' % prev
 
 439                 yield _genslice(start
, i
, step
) 
 441         test_string 
= u
''.join(map(compat_chr
, range(len(example_sig
)))) 
 442         cache_res 
= func(test_string
) 
 443         cache_spec 
= [ord(c
) for c 
in cache_res
] 
 444         expr_code 
= u
' + '.join(gen_sig_code(cache_spec
)) 
 445         signature_id_tuple 
= '(%s)' % ( 
 446             ', '.join(compat_str(len(p
)) for p 
in example_sig
.split('.'))) 
 447         code 
= (u
'if tuple(len(p) for p in s.split(\'.\')) == %s:\n' 
 448                 u
'    return %s\n') % (signature_id_tuple
, expr_code
) 
 449         self
.to_screen(u
'Extracted signature function:\n' + code
) 
 451     def _parse_sig_js(self
, jscode
): 
 452         funcname 
= self
._search
_regex
( 
 453             r
'signature=([$a-zA-Z]+)', jscode
, 
 454              u
'Initial JS player signature function name') 
 456         jsi 
= JSInterpreter(jscode
) 
 457         initial_function 
= jsi
.extract_function(funcname
) 
 458         return lambda s
: initial_function([s
]) 
 460     def _parse_sig_swf(self
, file_contents
): 
 461         swfi 
= SWFInterpreter(file_contents
) 
 462         TARGET_CLASSNAME 
= u
'SignatureDecipher' 
 463         searched_class 
= swfi
.extract_class(TARGET_CLASSNAME
) 
 464         initial_function 
= swfi
.extract_function(searched_class
, u
'decipher') 
 465         return lambda s
: initial_function([s
]) 
 467     def _decrypt_signature(self
, s
, video_id
, player_url
, age_gate
=False): 
 468         """Turn the encrypted s field into a working signature""" 
 470         if player_url 
is None: 
 471             raise ExtractorError(u
'Cannot decrypt signature without player_url') 
 473         if player_url
.startswith(u
'//'): 
 474             player_url 
= u
'https:' + player_url
 
 476             player_id 
= (player_url
, self
._signature
_cache
_id
(s
)) 
 477             if player_id 
not in self
._player
_cache
: 
 478                 func 
= self
._extract
_signature
_function
( 
 479                     video_id
, player_url
, s
 
 481                 self
._player
_cache
[player_id
] = func
 
 482             func 
= self
._player
_cache
[player_id
] 
 483             if self
._downloader
.params
.get('youtube_print_sig_code'): 
 484                 self
._print
_sig
_code
(func
, s
) 
 486         except Exception as e
: 
 487             tb 
= traceback
.format_exc() 
 488             raise ExtractorError( 
 489                 u
'Signature extraction failed: ' + tb
, cause
=e
) 
 491     def _get_available_subtitles(self
, video_id
, webpage
): 
 493             sub_list 
= self
._download
_webpage
( 
 494                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
, 
 495                 video_id
, note
=False) 
 496         except ExtractorError 
as err
: 
 497             self
._downloader
.report_warning(u
'unable to download video subtitles: %s' % compat_str(err
)) 
 499         lang_list 
= re
.findall(r
'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list
) 
 504             params 
= compat_urllib_parse
.urlencode({ 
 507                 'fmt': self
._downloader
.params
.get('subtitlesformat', 'srt'), 
 508                 'name': unescapeHTML(l
[0]).encode('utf-8'), 
 510             url 
= u
'https://www.youtube.com/api/timedtext?' + params
 
 511             sub_lang_list
[lang
] = url
 
 512         if not sub_lang_list
: 
 513             self
._downloader
.report_warning(u
'video doesn\'t have subtitles') 
 517     def _get_available_automatic_caption(self
, video_id
, webpage
): 
 518         """We need the webpage for getting the captions url, pass it as an 
 519            argument to speed up the process.""" 
 520         sub_format 
= self
._downloader
.params
.get('subtitlesformat', 'srt') 
 521         self
.to_screen(u
'%s: Looking for automatic captions' % video_id
) 
 522         mobj 
= re
.search(r
';ytplayer.config = ({.*?});', webpage
) 
 523         err_msg 
= u
'Couldn\'t find automatic captions for %s' % video_id
 
 525             self
._downloader
.report_warning(err_msg
) 
 527         player_config 
= json
.loads(mobj
.group(1)) 
 529             args 
= player_config
[u
'args'] 
 530             caption_url 
= args
[u
'ttsurl'] 
 531             timestamp 
= args
[u
'timestamp'] 
 532             # We get the available subtitles 
 533             list_params 
= compat_urllib_parse
.urlencode({ 
 538             list_url 
= caption_url 
+ '&' + list_params
 
 539             caption_list 
= self
._download
_xml
(list_url
, video_id
) 
 540             original_lang_node 
= caption_list
.find('track') 
 541             if original_lang_node 
is None or original_lang_node
.attrib
.get('kind') != 'asr' : 
 542                 self
._downloader
.report_warning(u
'Video doesn\'t have automatic captions') 
 544             original_lang 
= original_lang_node
.attrib
['lang_code'] 
 547             for lang_node 
in caption_list
.findall('target'): 
 548                 sub_lang 
= lang_node
.attrib
['lang_code'] 
 549                 params 
= compat_urllib_parse
.urlencode({ 
 550                     'lang': original_lang
, 
 556                 sub_lang_list
[sub_lang
] = caption_url 
+ '&' + params
 
 558         # An extractor error can be raise by the download process if there are 
 559         # no automatic captions but there are subtitles 
 560         except (KeyError, ExtractorError
): 
 561             self
._downloader
.report_warning(err_msg
) 
 565     def extract_id(cls
, url
): 
 566         mobj 
= re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) 
 568             raise ExtractorError(u
'Invalid URL: %s' % url
) 
 569         video_id 
= mobj
.group(2) 
 572     def _extract_from_m3u8(self
, manifest_url
, video_id
): 
 574         def _get_urls(_manifest
): 
 575             lines 
= _manifest
.split('\n') 
 576             urls 
= filter(lambda l
: l 
and not l
.startswith('#'), 
 579         manifest 
= self
._download
_webpage
(manifest_url
, video_id
, u
'Downloading formats manifest') 
 580         formats_urls 
= _get_urls(manifest
) 
 581         for format_url 
in formats_urls
: 
 582             itag 
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag') 
 583             url_map
[itag
] = format_url
 
 586     def _extract_annotations(self
, video_id
): 
 587         url 
= 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 
 588         return self
._download
_webpage
(url
, video_id
, note
=u
'Searching for annotations.', errnote
=u
'Unable to download video annotations.') 
 590     def _real_extract(self
, url
): 
 592             u
'http' if self
._downloader
.params
.get('prefer_insecure', False) 
 595         # Extract original video URL from URL with redirection, like age verification, using next_url parameter 
 596         mobj 
= re
.search(self
._NEXT
_URL
_RE
, url
) 
 598             url 
= proto 
+ '://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/') 
 599         video_id 
= self
.extract_id(url
) 
 602         url 
= proto 
+ '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
 
 603         video_webpage 
= self
._download
_webpage
(url
, video_id
) 
 605         # Attempt to extract SWF player URL 
 606         mobj 
= re
.search(r
'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
 608             player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
 613         self
.report_video_info_webpage_download(video_id
) 
 614         if re
.search(r
'player-age-gate-content">', video_webpage
) is not None: 
 615             self
.report_age_confirmation() 
 617             # We simulate the access to the video from www.youtube.com/v/{video_id} 
 618             # this can be viewed without login into Youtube 
 619             data 
= compat_urllib_parse
.urlencode({ 
 620                 'video_id': video_id
, 
 621                 'eurl': 'https://youtube.googleapis.com/v/' + video_id
, 
 622                 'sts': self
._search
_regex
( 
 623                     r
'"sts"\s*:\s*(\d+)', video_webpage
, 'sts'), 
 625             video_info_url 
= proto 
+ '://www.youtube.com/get_video_info?' + data
 
 626             video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
 628                                     errnote
='unable to download video info webpage') 
 629             video_info 
= compat_parse_qs(video_info_webpage
) 
 632             for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
 633                 video_info_url 
= (proto 
+ '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
 634                         % (video_id
, el_type
)) 
 635                 video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
 637                                         errnote
='unable to download video info webpage') 
 638                 video_info 
= compat_parse_qs(video_info_webpage
) 
 639                 if 'token' in video_info
: 
 641         if 'token' not in video_info
: 
 642             if 'reason' in video_info
: 
 643                 raise ExtractorError( 
 644                     u
'YouTube said: %s' % video_info
['reason'][0], 
 645                     expected
=True, video_id
=video_id
) 
 647                 raise ExtractorError( 
 648                     u
'"token" parameter not in video info for unknown reason', 
 651         if 'view_count' in video_info
: 
 652             view_count 
= int(video_info
['view_count'][0]) 
 656         # Check for "rental" videos 
 657         if 'ypc_video_rental_bar_text' in video_info 
and 'author' not in video_info
: 
 658             raise ExtractorError(u
'"rental" videos not supported') 
 660         # Start extracting information 
 661         self
.report_information_extraction(video_id
) 
 664         if 'author' not in video_info
: 
 665             raise ExtractorError(u
'Unable to extract uploader name') 
 666         video_uploader 
= compat_urllib_parse
.unquote_plus(video_info
['author'][0]) 
 669         video_uploader_id 
= None 
 670         mobj 
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
) 
 672             video_uploader_id 
= mobj
.group(1) 
 674             self
._downloader
.report_warning(u
'unable to extract uploader nickname') 
 677         if 'title' in video_info
: 
 678             video_title 
= video_info
['title'][0] 
 680             self
._downloader
.report_warning(u
'Unable to extract video title') 
 684         # We try first to get a high quality image: 
 685         m_thumb 
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">', 
 686                             video_webpage
, re
.DOTALL
) 
 687         if m_thumb 
is not None: 
 688             video_thumbnail 
= m_thumb
.group(1) 
 689         elif 'thumbnail_url' not in video_info
: 
 690             self
._downloader
.report_warning(u
'unable to extract video thumbnail') 
 691             video_thumbnail 
= None 
 692         else:   # don't panic if we can't find it 
 693             video_thumbnail 
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0]) 
 697         mobj 
= re
.search(r
'(?s)id="eow-date.*?>(.*?)</span>', video_webpage
) 
 700                 r
'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>', 
 703             upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
 704             upload_date 
= unified_strdate(upload_date
) 
 706         m_cat_container 
= get_element_by_id("eow-category", video_webpage
) 
 708             category 
= self
._html
_search
_regex
( 
 709                 r
'(?s)<a[^<]+>(.*?)</a>', m_cat_container
, 'category', 
 711             video_categories 
= None if category 
is None else [category
] 
 713             video_categories 
= None 
 716         video_description 
= get_element_by_id("eow-description", video_webpage
) 
 717         if video_description
: 
 718             video_description 
= re
.sub(r
'''(?x) 
 720                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
 722                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
 723                     class="yt-uix-redirect-link"\s*> 
 726             ''', r
'\1', video_description
) 
 727             video_description 
= clean_html(video_description
) 
 729             fd_mobj 
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
) 
 731                 video_description 
= unescapeHTML(fd_mobj
.group(1)) 
 733                 video_description 
= u
'' 
 735         def _extract_count(klass
): 
 736             count 
= self
._search
_regex
( 
 737                 r
'class="%s">([\d,]+)</span>' % re
.escape(klass
), 
 738                 video_webpage
, klass
, default
=None) 
 739             if count 
is not None: 
 740                 return int(count
.replace(',', '')) 
 742         like_count 
= _extract_count(u
'likes-count') 
 743         dislike_count 
= _extract_count(u
'dislikes-count') 
 746         video_subtitles 
= self
.extract_subtitles(video_id
, video_webpage
) 
 748         if self
._downloader
.params
.get('listsubtitles', False): 
 749             self
._list
_available
_subtitles
(video_id
, video_webpage
) 
 752         if 'length_seconds' not in video_info
: 
 753             self
._downloader
.report_warning(u
'unable to extract video duration') 
 754             video_duration 
= None 
 756             video_duration 
= int(compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0])) 
 759         video_annotations 
= None 
 760         if self
._downloader
.params
.get('writeannotations', False): 
 761                 video_annotations 
= self
._extract
_annotations
(video_id
) 
 763         # Decide which formats to download 
 765             mobj 
= re
.search(r
';ytplayer\.config\s*=\s*({.*?});', video_webpage
) 
 767                 raise ValueError('Could not find vevo ID') 
 768             json_code 
= uppercase_escape(mobj
.group(1)) 
 769             ytplayer_config 
= json
.loads(json_code
) 
 770             args 
= ytplayer_config
['args'] 
 771             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map 
 772             # this signatures are encrypted 
 773             if 'url_encoded_fmt_stream_map' not in args
: 
 774                 raise ValueError(u
'No stream_map present')  # caught below 
 775             re_signature 
= re
.compile(r
'[&,]s=') 
 776             m_s 
= re_signature
.search(args
['url_encoded_fmt_stream_map']) 
 778                 self
.to_screen(u
'%s: Encrypted signatures detected.' % video_id
) 
 779                 video_info
['url_encoded_fmt_stream_map'] = [args
['url_encoded_fmt_stream_map']] 
 780             m_s 
= re_signature
.search(args
.get('adaptive_fmts', u
'')) 
 782                 if 'adaptive_fmts' in video_info
: 
 783                     video_info
['adaptive_fmts'][0] += ',' + args
['adaptive_fmts'] 
 785                     video_info
['adaptive_fmts'] = [args
['adaptive_fmts']] 
 789         def _map_to_format_list(urlmap
): 
 791             for itag
, video_real_url 
in urlmap
.items(): 
 794                     'url': video_real_url
, 
 795                     'player_url': player_url
, 
 797                 if itag 
in self
._formats
: 
 798                     dct
.update(self
._formats
[itag
]) 
 802         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
 803             self
.report_rtmp_download() 
 805                 'format_id': '_rtmp', 
 807                 'url': video_info
['conn'][0], 
 808                 'player_url': player_url
, 
 810         elif len(video_info
.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info
.get('adaptive_fmts', [])) >= 1: 
 811             encoded_url_map 
= video_info
.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info
.get('adaptive_fmts',[''])[0] 
 812             if 'rtmpe%3Dyes' in encoded_url_map
: 
 813                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True) 
 815             for url_data_str 
in encoded_url_map
.split(','): 
 816                 url_data 
= compat_parse_qs(url_data_str
) 
 817                 if 'itag' not in url_data 
or 'url' not in url_data
: 
 819                 format_id 
= url_data
['itag'][0] 
 820                 url 
= url_data
['url'][0] 
 822                 if 'sig' in url_data
: 
 823                     url 
+= '&signature=' + url_data
['sig'][0] 
 824                 elif 's' in url_data
: 
 825                     encrypted_sig 
= url_data
['s'][0] 
 828                         jsplayer_url_json 
= self
._search
_regex
( 
 829                             r
'"assets":.+?"js":\s*("[^"]+")', 
 830                             video_webpage
, u
'JS player URL') 
 831                         player_url 
= json
.loads(jsplayer_url_json
) 
 832                     if player_url 
is None: 
 833                         player_url_json 
= self
._search
_regex
( 
 834                             r
'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', 
 835                             video_webpage
, u
'age gate player URL') 
 836                         player_url 
= json
.loads(player_url_json
) 
 838                     if self
._downloader
.params
.get('verbose'): 
 839                         if player_url 
is None: 
 840                             player_version 
= 'unknown' 
 841                             player_desc 
= 'unknown' 
 843                             if player_url
.endswith('swf'): 
 844                                 player_version 
= self
._search
_regex
( 
 845                                     r
'-(.+?)(?:/watch_as3)?\.swf$', player_url
, 
 846                                     u
'flash player', fatal
=False) 
 847                                 player_desc 
= 'flash player %s' % player_version
 
 849                                 player_version 
= self
._search
_regex
( 
 850                                     r
'html5player-([^/]+?)(?:/html5player)?\.js', 
 852                                     'html5 player', fatal
=False) 
 853                                 player_desc 
= u
'html5 player %s' % player_version
 
 855                         parts_sizes 
= self
._signature
_cache
_id
(encrypted_sig
) 
 856                         self
.to_screen(u
'{%s} signature length %s, %s' % 
 857                             (format_id
, parts_sizes
, player_desc
)) 
 859                     signature 
= self
._decrypt
_signature
( 
 860                         encrypted_sig
, video_id
, player_url
, age_gate
) 
 861                     url 
+= '&signature=' + signature
 
 862                 if 'ratebypass' not in url
: 
 863                     url 
+= '&ratebypass=yes' 
 864                 url_map
[format_id
] = url
 
 865             formats 
= _map_to_format_list(url_map
) 
 866         elif video_info
.get('hlsvp'): 
 867             manifest_url 
= video_info
['hlsvp'][0] 
 868             url_map 
= self
._extract
_from
_m
3u8(manifest_url
, video_id
) 
 869             formats 
= _map_to_format_list(url_map
) 
 871             raise ExtractorError(u
'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') 
 873         # Look for the DASH manifest 
 874         if (self
._downloader
.params
.get('youtube_include_dash_manifest', False)): 
 876                 # The DASH manifest used needs to be the one from the original video_webpage. 
 877                 # The one found in get_video_info seems to be using different signatures. 
 878                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. 
 879                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the 
 880                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. 
 882                     dash_manifest_url 
= video_info
.get('dashmpd')[0] 
 884                     dash_manifest_url 
= ytplayer_config
['args']['dashmpd'] 
 885                 def decrypt_sig(mobj
): 
 887                     dec_s 
= self
._decrypt
_signature
(s
, video_id
, player_url
, age_gate
) 
 888                     return '/signature/%s' % dec_s
 
 889                 dash_manifest_url 
= re
.sub(r
'/s/([\w\.]+)', decrypt_sig
, dash_manifest_url
) 
 890                 dash_doc 
= self
._download
_xml
( 
 891                     dash_manifest_url
, video_id
, 
 892                     note
=u
'Downloading DASH manifest', 
 893                     errnote
=u
'Could not download DASH manifest') 
 894                 for r 
in dash_doc
.findall(u
'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): 
 895                     url_el 
= r
.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') 
 898                     format_id 
= r
.attrib
['id'] 
 899                     video_url 
= url_el
.text
 
 900                     filesize 
= int_or_none(url_el
.attrib
.get('{http://youtube.com/yt/2012/10/10}contentLength')) 
 902                         'format_id': format_id
, 
 904                         'width': int_or_none(r
.attrib
.get('width')), 
 905                         'tbr': int_or_none(r
.attrib
.get('bandwidth'), 1000), 
 906                         'asr': int_or_none(r
.attrib
.get('audioSamplingRate')), 
 907                         'filesize': filesize
, 
 910                         existing_format 
= next( 
 912                             if fo
['format_id'] == format_id
) 
 913                     except StopIteration: 
 914                         f
.update(self
._formats
.get(format_id
, {})) 
 917                         existing_format
.update(f
) 
 919             except (ExtractorError
, KeyError) as e
: 
 920                 self
.report_warning(u
'Skipping DASH manifest: %s' % e
, video_id
) 
 922         self
._sort
_formats
(formats
) 
 926             'uploader':     video_uploader
, 
 927             'uploader_id':  video_uploader_id
, 
 928             'upload_date':  upload_date
, 
 929             'title':        video_title
, 
 930             'thumbnail':    video_thumbnail
, 
 931             'description':  video_description
, 
 932             'categories':   video_categories
, 
 933             'subtitles':    video_subtitles
, 
 934             'duration':     video_duration
, 
 935             'age_limit':    18 if age_gate 
else 0, 
 936             'annotations':  video_annotations
, 
 937             'webpage_url': proto 
+ '://www.youtube.com/watch?v=%s' % video_id
, 
 938             'view_count':   view_count
, 
 939             'like_count': like_count
, 
 940             'dislike_count': dislike_count
, 
 944 class YoutubePlaylistIE(YoutubeBaseInfoExtractor
): 
 945     IE_DESC 
= u
'YouTube.com playlists' 
 946     _VALID_URL 
= r
"""(?x)(?: 
 951                            (?:course|view_play_list|my_playlists|artist|playlist|watch) 
 952                            \? (?:.*?&)*? (?:p|a|list)= 
 956                             (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} 
 957                             # Top tracks, they can also include dots  
 962                         ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) 
 964     _TEMPLATE_URL 
= 'https://www.youtube.com/playlist?list=%s' 
 965     _MORE_PAGES_INDICATOR 
= r
'data-link-type="next"' 
 966     _VIDEO_RE 
= r
'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' 
 967     IE_NAME 
= u
'youtube:playlist' 
 969     def _real_initialize(self
): 
 972     def _ids_to_results(self
, ids
): 
 973         return [self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
) 
 976     def _extract_mix(self
, playlist_id
): 
 977         # The mixes are generated from a a single video 
 978         # the id of the playlist is just 'RD' + video_id 
 979         url 
= 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id
[-11:], playlist_id
) 
 980         webpage 
= self
._download
_webpage
(url
, playlist_id
, u
'Downloading Youtube mix') 
 981         search_title 
= lambda class_name
: get_element_by_attribute('class', class_name
, webpage
) 
 982         title_span 
= (search_title('playlist-title') or 
 983             search_title('title long-title') or search_title('title')) 
 984         title 
= clean_html(title_span
) 
 985         video_re 
= r
'''(?x)data-video-username=".*?".*? 
 986                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re
.escape(playlist_id
) 
 987         ids 
= orderedSet(re
.findall(video_re
, webpage
, flags
=re
.DOTALL
)) 
 988         url_results 
= self
._ids
_to
_results
(ids
) 
 990         return self
.playlist_result(url_results
, playlist_id
, title
) 
 992     def _real_extract(self
, url
): 
 993         # Extract playlist id 
 994         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 996             raise ExtractorError(u
'Invalid URL: %s' % url
) 
 997         playlist_id 
= mobj
.group(1) or mobj
.group(2) 
 999         # Check if it's a video-specific URL 
1000         query_dict 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
1001         if 'v' in query_dict
: 
1002             video_id 
= query_dict
['v'][0] 
1003             if self
._downloader
.params
.get('noplaylist'): 
1004                 self
.to_screen(u
'Downloading just video %s because of --no-playlist' % video_id
) 
1005                 return self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1007                 self
.to_screen(u
'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id
, video_id
)) 
1009         if playlist_id
.startswith('RD'): 
1010             # Mixes require a custom extraction process 
1011             return self
._extract
_mix
(playlist_id
) 
1012         if playlist_id
.startswith('TL'): 
1013             raise ExtractorError(u
'For downloading YouTube.com top lists, use ' 
1014                 u
'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected
=True) 
1016         url 
= self
._TEMPLATE
_URL 
% playlist_id
 
1017         page 
= self
._download
_webpage
(url
, playlist_id
) 
1018         more_widget_html 
= content_html 
= page
 
1020         # Check if the playlist exists or is private 
1021         if re
.search(r
'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page
) is not None: 
1022             raise ExtractorError( 
1023                 u
'The playlist doesn\'t exist or is private, use --username or ' 
1024                 '--netrc to access it.', 
1027         # Extract the video ids from the playlist pages 
1030         for page_num 
in itertools
.count(1): 
1031             matches 
= re
.finditer(self
._VIDEO
_RE
, content_html
) 
1032             # We remove the duplicates and the link with index 0 
1033             # (it's not the first video of the playlist) 
1034             new_ids 
= orderedSet(m
.group('id') for m 
in matches 
if m
.group('index') != '0') 
1037             mobj 
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
) 
1041             more 
= self
._download
_json
( 
1042                 'https://youtube.com/%s' % mobj
.group('more'), playlist_id
, 
1043                 'Downloading page #%s' % page_num
, 
1044                 transform_source
=uppercase_escape
) 
1045             content_html 
= more
['content_html'] 
1046             more_widget_html 
= more
['load_more_widget_html'] 
1048         playlist_title 
= self
._html
_search
_regex
( 
1049             r
'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', 
1052         url_results 
= self
._ids
_to
_results
(ids
) 
1053         return self
.playlist_result(url_results
, playlist_id
, playlist_title
) 
1056 class YoutubeTopListIE(YoutubePlaylistIE
): 
1057     IE_NAME 
= u
'youtube:toplist' 
1058     IE_DESC 
= (u
'YouTube.com top lists, "yttoplist:{channel}:{list title}"' 
1059         u
' (Example: "yttoplist:music:Top Tracks")') 
1060     _VALID_URL 
= r
'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' 
1062     def _real_extract(self
, url
): 
1063         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1064         channel 
= mobj
.group('chann') 
1065         title 
= mobj
.group('title') 
1066         query 
= compat_urllib_parse
.urlencode({'title': title
}) 
1067         playlist_re 
= 'href="([^"]+?%s.*?)"' % re
.escape(query
) 
1068         channel_page 
= self
._download
_webpage
('https://www.youtube.com/%s' % channel
, title
) 
1069         link 
= self
._html
_search
_regex
(playlist_re
, channel_page
, u
'list') 
1070         url 
= compat_urlparse
.urljoin('https://www.youtube.com/', link
) 
1072         video_re 
= r
'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' 
1074         # sometimes the webpage doesn't contain the videos 
1075         # retry until we get them 
1076         for i 
in itertools
.count(0): 
1077             msg 
= u
'Downloading Youtube mix' 
1079                 msg 
+= ', retry #%d' % i
 
1080             webpage 
= self
._download
_webpage
(url
, title
, msg
) 
1081             ids 
= orderedSet(re
.findall(video_re
, webpage
)) 
1084         url_results 
= self
._ids
_to
_results
(ids
) 
1085         return self
.playlist_result(url_results
, playlist_title
=title
) 
1088 class YoutubeChannelIE(InfoExtractor
): 
1089     IE_DESC 
= u
'YouTube.com channels' 
1090     _VALID_URL 
= r
"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" 
1091     _MORE_PAGES_INDICATOR 
= 'yt-uix-load-more' 
1092     _MORE_PAGES_URL 
= 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' 
1093     IE_NAME 
= u
'youtube:channel' 
1095     def extract_videos_from_page(self
, page
): 
1097         for mobj 
in re
.finditer(r
'href="/watch\?v=([0-9A-Za-z_-]+)&?', page
): 
1098             if mobj
.group(1) not in ids_in_page
: 
1099                 ids_in_page
.append(mobj
.group(1)) 
1102     def _real_extract(self
, url
): 
1103         # Extract channel id 
1104         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1106             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1108         # Download channel page 
1109         channel_id 
= mobj
.group(1) 
1111         url 
= 'https://www.youtube.com/channel/%s/videos' % channel_id
 
1112         channel_page 
= self
._download
_webpage
(url
, channel_id
) 
1113         autogenerated 
= re
.search(r
'''(?x) 
1115                     channel-header-autogenerated-label| 
1116                     yt-channel-title-autogenerated 
1117                 )[^"]*"''', channel_page
) is not None 
1120             # The videos are contained in a single page 
1121             # the ajax pages can't be used, they are empty 
1122             video_ids 
= self
.extract_videos_from_page(channel_page
) 
1124             # Download all channel pages using the json-based channel_ajax query 
1125             for pagenum 
in itertools
.count(1): 
1126                 url 
= self
._MORE
_PAGES
_URL 
% (pagenum
, channel_id
) 
1127                 page 
= self
._download
_json
( 
1128                     url
, channel_id
, note
=u
'Downloading page #%s' % pagenum
, 
1129                     transform_source
=uppercase_escape
) 
1131                 ids_in_page 
= self
.extract_videos_from_page(page
['content_html']) 
1132                 video_ids
.extend(ids_in_page
) 
1134                 if self
._MORE
_PAGES
_INDICATOR 
not in page
['load_more_widget_html']: 
1137         self
._downloader
.to_screen(u
'[youtube] Channel %s: Found %i videos' % (channel_id
, len(video_ids
))) 
1139         url_entries 
= [self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1140                        for video_id 
in video_ids
] 
1141         return self
.playlist_result(url_entries
, channel_id
) 
1144 class YoutubeUserIE(InfoExtractor
): 
1145     IE_DESC 
= u
'YouTube.com user videos (URL or "ytuser" keyword)' 
1146     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' 
1147     _TEMPLATE_URL 
= 'https://gdata.youtube.com/feeds/api/users/%s' 
1148     _GDATA_PAGE_SIZE 
= 50 
1149     _GDATA_URL 
= 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' 
1150     IE_NAME 
= u
'youtube:user' 
1153     def suitable(cls
, url
): 
1154         # Don't return True if the url can be extracted with other youtube 
1155         # extractor, the regex would is too permissive and it would match. 
1156         other_ies 
= iter(klass 
for (name
, klass
) in globals().items() if name
.endswith('IE') and klass 
is not cls
) 
1157         if any(ie
.suitable(url
) for ie 
in other_ies
): return False 
1158         else: return super(YoutubeUserIE
, cls
).suitable(url
) 
1160     def _real_extract(self
, url
): 
1162         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1164             raise ExtractorError(u
'Invalid URL: %s' % url
) 
1166         username 
= mobj
.group(1) 
1168         # Download video ids using YouTube Data API. Result size per 
1169         # query is limited (currently to 50 videos) so we need to query 
1170         # page by page until there are no video ids - it means we got 
1173         def download_page(pagenum
): 
1174             start_index 
= pagenum 
* self
._GDATA
_PAGE
_SIZE 
+ 1 
1176             gdata_url 
= self
._GDATA
_URL 
% (username
, self
._GDATA
_PAGE
_SIZE
, start_index
) 
1177             page 
= self
._download
_webpage
( 
1178                 gdata_url
, username
, 
1179                 u
'Downloading video ids from %d to %d' % ( 
1180                     start_index
, start_index 
+ self
._GDATA
_PAGE
_SIZE
)) 
1183                 response 
= json
.loads(page
) 
1184             except ValueError as err
: 
1185                 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
)) 
1186             if 'entry' not in response
['feed']: 
1189             # Extract video identifiers 
1190             entries 
= response
['feed']['entry'] 
1191             for entry 
in entries
: 
1192                 title 
= entry
['title']['$t'] 
1193                 video_id 
= entry
['id']['$t'].split('/')[-1] 
1197                     'ie_key': 'Youtube', 
1201         url_results 
= PagedList(download_page
, self
._GDATA
_PAGE
_SIZE
) 
1203         return self
.playlist_result(url_results
, playlist_title
=username
) 
1206 class YoutubeSearchIE(SearchInfoExtractor
): 
1207     IE_DESC 
= u
'YouTube.com searches' 
1208     _API_URL 
= u
'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' 
1210     IE_NAME 
= u
'youtube:search' 
1211     _SEARCH_KEY 
= 'ytsearch' 
1213     def _get_n_results(self
, query
, n
): 
1214         """Get a specified number of results for a query""" 
1221         while (PAGE_SIZE 
* pagenum
) < limit
: 
1222             result_url 
= self
._API
_URL 
% ( 
1223                 compat_urllib_parse
.quote_plus(query
.encode('utf-8')), 
1224                 (PAGE_SIZE 
* pagenum
) + 1) 
1225             data_json 
= self
._download
_webpage
( 
1226                 result_url
, video_id
=u
'query "%s"' % query
, 
1227                 note
=u
'Downloading page %s' % (pagenum 
+ 1), 
1228                 errnote
=u
'Unable to download API page') 
1229             data 
= json
.loads(data_json
) 
1230             api_response 
= data
['data'] 
1232             if 'items' not in api_response
: 
1233                 raise ExtractorError( 
1234                     u
'[youtube] No video results', expected
=True) 
1236             new_ids 
= list(video
['id'] for video 
in api_response
['items']) 
1237             video_ids 
+= new_ids
 
1239             limit 
= min(n
, api_response
['totalItems']) 
1242         if len(video_ids
) > n
: 
1243             video_ids 
= video_ids
[:n
] 
1244         videos 
= [self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1245                   for video_id 
in video_ids
] 
1246         return self
.playlist_result(videos
, query
) 
1249 class YoutubeSearchDateIE(YoutubeSearchIE
): 
1250     IE_NAME 
= YoutubeSearchIE
.IE_NAME 
+ ':date' 
1251     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' 
1252     _SEARCH_KEY 
= 'ytsearchdate' 
1253     IE_DESC 
= u
'YouTube.com searches, newest videos first' 
1256 class YoutubeSearchURLIE(InfoExtractor
): 
1257     IE_DESC 
= u
'YouTube.com search URLs' 
1258     IE_NAME 
= u
'youtube:search_url' 
1259     _VALID_URL 
= r
'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' 
1261     def _real_extract(self
, url
): 
1262         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1263         query 
= compat_urllib_parse
.unquote_plus(mobj
.group('query')) 
1265         webpage 
= self
._download
_webpage
(url
, query
) 
1266         result_code 
= self
._search
_regex
( 
1267             r
'(?s)<ol class="item-section"(.*?)</ol>', webpage
, u
'result HTML') 
1269         part_codes 
= re
.findall( 
1270             r
'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code
) 
1272         for part_code 
in part_codes
: 
1273             part_title 
= self
._html
_search
_regex
( 
1274                 [r
'(?s)title="([^"]+)"', r
'>([^<]+)</a>'], part_code
, 'item title', fatal
=False) 
1275             part_url_snippet 
= self
._html
_search
_regex
( 
1276                 r
'(?s)href="([^"]+)"', part_code
, 'item URL') 
1277             part_url 
= compat_urlparse
.urljoin( 
1278                 'https://www.youtube.com/', part_url_snippet
) 
1282                 'title': part_title
, 
1286             '_type': 'playlist', 
1292 class YoutubeShowIE(InfoExtractor
): 
1293     IE_DESC 
= u
'YouTube.com (multi-season) shows' 
1294     _VALID_URL 
= r
'https?://www\.youtube\.com/show/(.*)' 
1295     IE_NAME 
= u
'youtube:show' 
1297     def _real_extract(self
, url
): 
1298         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1299         show_name 
= mobj
.group(1) 
1300         webpage 
= self
._download
_webpage
(url
, show_name
, u
'Downloading show webpage') 
1301         # There's one playlist for each season of the show 
1302         m_seasons 
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
)) 
1303         self
.to_screen(u
'%s: Found %s seasons' % (show_name
, len(m_seasons
))) 
1304         return [self
.url_result('https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') for season 
in m_seasons
] 
1307 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
): 
1309     Base class for extractors that fetch info from 
1310     http://www.youtube.com/feed_ajax 
1311     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. 
1313     _LOGIN_REQUIRED 
= True 
1314     # use action_load_personal_feed instead of action_load_system_feed 
1315     _PERSONAL_FEED 
= False 
1318     def _FEED_TEMPLATE(self
): 
1319         action 
= 'action_load_system_feed' 
1320         if self
._PERSONAL
_FEED
: 
1321             action 
= 'action_load_personal_feed' 
1322         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action
, self
._FEED
_NAME
) 
1326         return u
'youtube:%s' % self
._FEED
_NAME
 
1328     def _real_initialize(self
): 
1331     def _real_extract(self
, url
): 
1334         for i 
in itertools
.count(1): 
1335             info 
= self
._download
_json
(self
._FEED
_TEMPLATE 
% paging
, 
1336                                           u
'%s feed' % self
._FEED
_NAME
, 
1337                                           u
'Downloading page %s' % i
) 
1338             feed_html 
= info
.get('feed_html') or info
.get('content_html') 
1339             m_ids 
= re
.finditer(r
'"/watch\?v=(.*?)["&]', feed_html
) 
1340             ids 
= orderedSet(m
.group(1) for m 
in m_ids
) 
1341             feed_entries
.extend( 
1342                 self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1343                 for video_id 
in ids
) 
1345                 r
'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)', 
1349             paging 
= mobj
.group('paging') 
1350         return self
.playlist_result(feed_entries
, playlist_title
=self
._PLAYLIST
_TITLE
) 
1352 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
): 
1353     IE_DESC 
= u
'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' 
1354     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' 
1355     _FEED_NAME 
= 'subscriptions' 
1356     _PLAYLIST_TITLE 
= u
'Youtube Subscriptions' 
1358 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
): 
1359     IE_DESC 
= u
'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' 
1360     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' 
1361     _FEED_NAME 
= 'recommended' 
1362     _PLAYLIST_TITLE 
= u
'Youtube Recommended videos' 
1364 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor
): 
1365     IE_DESC 
= u
'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' 
1366     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' 
1367     _FEED_NAME 
= 'watch_later' 
1368     _PLAYLIST_TITLE 
= u
'Youtube Watch Later' 
1369     _PERSONAL_FEED 
= True 
1371 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor
): 
1372     IE_DESC 
= u
'Youtube watch history, "ythistory" keyword (requires authentication)' 
1373     _VALID_URL 
= u
'https?://www\.youtube\.com/feed/history|:ythistory' 
1374     _FEED_NAME 
= 'history' 
1375     _PERSONAL_FEED 
= True 
1376     _PLAYLIST_TITLE 
= u
'Youtube Watch History' 
1378 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
): 
1379     IE_NAME 
= u
'youtube:favorites' 
1380     IE_DESC 
= u
'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' 
1381     _VALID_URL 
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' 
1382     _LOGIN_REQUIRED 
= True 
1384     def _real_extract(self
, url
): 
1385         webpage 
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') 
1386         playlist_id 
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, u
'favourites playlist id') 
1387         return self
.url_result(playlist_id
, 'YoutubePlaylist') 
1390 class YoutubeTruncatedURLIE(InfoExtractor
): 
1391     IE_NAME 
= 'youtube:truncated_url' 
1392     IE_DESC 
= False  # Do not list 
1393     _VALID_URL 
= r
'''(?x) 
1394         (?:https?://)?[^/]+/watch\?(?: 
1396             annotation_id=annotation_[^&]+ 
1398         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ 
1402         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', 
1403         'only_matching': True, 
1405         'url': 'http://www.youtube.com/watch?', 
1406         'only_matching': True, 
1409     def _real_extract(self
, url
): 
1410         raise ExtractorError( 
1411             u
'Did you forget to quote the URL? Remember that & is a meta ' 
1412             u
'character in most shells, so you want to put the URL in quotes, ' 
1414             u
'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' 
1415             u
' or simply  youtube-dl BaW_jenozKc  .',