3 from __future__ 
import unicode_literals
 
  12 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  13 from .subtitles 
import SubtitlesInfoExtractor
 
  14 from ..jsinterp 
import JSInterpreter
 
  15 from ..swfinterp 
import SWFInterpreter
 
  20     compat_urllib_request
, 
  26     get_element_by_attribute
, 
  36 class YoutubeBaseInfoExtractor(InfoExtractor
): 
  37     """Provide base functions for Youtube extractors""" 
  38     _LOGIN_URL 
= 'https://accounts.google.com/ServiceLogin' 
  39     _TWOFACTOR_URL 
= 'https://accounts.google.com/SecondFactor' 
  40     _LANG_URL 
= r
'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
  41     _AGE_URL 
= 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
  42     _NETRC_MACHINE 
= 'youtube' 
  43     # If True it will raise an error if no login info is provided 
  44     _LOGIN_REQUIRED 
= False 
  46     def _set_language(self
): 
  47         return bool(self
._download
_webpage
( 
  49             note
='Setting language', errnote
='unable to set language', 
  54         Attempt to log in to YouTube. 
  55         True is returned if successful or skipped. 
  56         False is returned if login failed. 
  58         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. 
  60         (username
, password
) = self
._get
_login
_info
() 
  61         # No authentication to be performed 
  63             if self
._LOGIN
_REQUIRED
: 
  64                 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  67         login_page 
= self
._download
_webpage
( 
  68             self
._LOGIN
_URL
, None, 
  69             note
='Downloading login page', 
  70             errnote
='unable to fetch login page', fatal
=False) 
  71         if login_page 
is False: 
  74         galx 
= self
._search
_regex
(r
'(?s)<input.+?name="GALX".+?value="(.+?)"', 
  75                                   login_page
, 'Login GALX parameter') 
  79                 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', 
  84                 'PersistentCookie': 'yes', 
  86                 'bgresponse': 'js_disabled', 
  87                 'checkConnection': '', 
  88                 'checkedDomains': 'youtube', 
 100         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode 
 102         login_form 
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
,v 
in login_form_strs
.items()) 
 103         login_data 
= compat_urllib_parse
.urlencode(login_form
).encode('ascii') 
 105         req 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
) 
 106         login_results 
= self
._download
_webpage
( 
 108             note
='Logging in', errnote
='unable to log in', fatal
=False) 
 109         if login_results 
is False: 
 112         if re
.search(r
'id="errormsg_0_Passwd"', login_results
) is not None: 
 113             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected
=True) 
 116         # TODO add SMS and phone call support - these require making a request and then prompting the user 
 118         if re
.search(r
'(?i)<form[^>]* id="gaia_secondfactorform"', login_results
) is not None: 
 119             tfa_code 
= self
._get
_tfa
_info
() 
 122                 self
._downloader
.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') 
 123                 self
._downloader
.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)') 
 126             # Unlike the first login form, secTok and timeStmp are both required for the TFA form 
 128             match 
= re
.search(r
'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) 
 130                 self._downloader.report_warning('Failed to get secTok 
- did the page structure change?
') 
 131             secTok = match.group(1) 
 132             match = re.search(r'id="timeStmp"\n\s
+value
=\'(.+)\'/>', login_results, re.M | re.U) 
 134                 self._downloader.report_warning('Failed to get timeStmp 
- did the page structure change?
') 
 135             timeStmp = match.group(1) 
 138                 'continue': 'https
://www
.youtube
.com
/signin?action_handle_signin
=true
&feature
=sign_in_button
&hl
=en_US
&nomobiletemp
=1', 
 140                 'smsUserPin
': tfa_code, 
 141                 'smsVerifyPin
': 'Verify
', 
 143                 'PersistentCookie
': 'yes
', 
 144                 'checkConnection
': '', 
 145                 'checkedDomains
': 'youtube
', 
 148                 'timeStmp
': timeStmp, 
 149                 'service
': 'youtube
', 
 152             tfa_form = dict((k.encode('utf
-8'), v.encode('utf
-8')) for k,v in tfa_form_strs.items()) 
 153             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii
') 
 155             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) 
 156             tfa_results = self._download_webpage( 
 158                 note='Submitting TFA code
', errnote='unable to submit tfa
', fatal=False) 
 160             if tfa_results is False: 
 163             if re.search(r'(?i
)<form
[^
>]* id="gaia_secondfactorform"', tfa_results) is not None: 
 164                 self._downloader.report_warning('Two
-factor code expired
. Please 
try again
, or use a one
-use backup code instead
.') 
 166             if re.search(r'(?i
)<form
[^
>]* id="gaia_loginform"', tfa_results) is not None: 
 167                 self._downloader.report_warning('unable to log 
in - did the page structure change?
') 
 169             if re.search(r'smsauth
-interstitial
-reviewsettings
', tfa_results) is not None: 
 170                 self._downloader.report_warning('Your Google account has a security notice
. Please log 
in on your web browser
, resolve the notice
, and try again
.') 
 173         if re.search(r'(?i
)<form
[^
>]* id="gaia_loginform"', login_results) is not None: 
 174             self._downloader.report_warning('unable to log 
in: bad username 
or password
') 
 178     def _confirm_age(self): 
 181             'action_confirm
': 'Confirm
', 
 183         req = compat_urllib_request.Request(self._AGE_URL, 
 184             compat_urllib_parse.urlencode(age_form).encode('ascii
')) 
 186         self._download_webpage( 
 188             note='Confirming age
', errnote='Unable to confirm age
', 
 191     def _real_initialize(self): 
 192         if self._downloader is None: 
 194         if self._get_login_info()[0] is not None: 
 195             if not self._set_language(): 
 197         if not self._login(): 
 202 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 
 203     IE_DESC = 'YouTube
.com
' 
 204     _VALID_URL = r"""(?x)^ 
 206                          (?:https?://|//)                                    # http(s):// or protocol-independent URL 
 207                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| 
 208                             (?:www\.)?deturl\.com/www\.youtube\.com/| 
 209                             (?:www\.)?pwnyoutube\.com/| 
 210                             (?:www\.)?yourepeat\.com/| 
 211                             tube\.majestyc\.net/| 
 212                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains 
 213                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls 
 214                          (?:                                                  # the various things that can precede the ID: 
 215                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/ 
 216                              |(?:                                             # or the v= param in all its forms 
 217                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 
 218                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! 
 219                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) 
 223                          |youtu\.be/                                          # just youtu.be/xxxx 
 224                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= 
 226                      )?                                                       # all until now is optional -> you can pass the naked ID 
 227                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID 
 228                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE 
 229                      (?(1).+)?                                                # if we found the ID, everything can follow 
 231     _NEXT_URL_RE = r'[\?&]next_url
=([^
&]+)' 
 233         '5': {'ext
': 'flv
', 'width
': 400, 'height
': 240}, 
 234         '6': {'ext
': 'flv
', 'width
': 450, 'height
': 270}, 
 235         '13': {'ext
': '3gp
'}, 
 236         '17': {'ext
': '3gp
', 'width
': 176, 'height
': 144}, 
 237         '18': {'ext
': 'mp4
', 'width
': 640, 'height
': 360}, 
 238         '22': {'ext
': 'mp4
', 'width
': 1280, 'height
': 720}, 
 239         '34': {'ext
': 'flv
', 'width
': 640, 'height
': 360}, 
 240         '35': {'ext
': 'flv
', 'width
': 854, 'height
': 480}, 
 241         '36': {'ext
': '3gp
', 'width
': 320, 'height
': 240}, 
 242         '37': {'ext
': 'mp4
', 'width
': 1920, 'height
': 1080}, 
 243         '38': {'ext
': 'mp4
', 'width
': 4096, 'height
': 3072}, 
 244         '43': {'ext
': 'webm
', 'width
': 640, 'height
': 360}, 
 245         '44': {'ext
': 'webm
', 'width
': 854, 'height
': 480}, 
 246         '45': {'ext
': 'webm
', 'width
': 1280, 'height
': 720}, 
 247         '46': {'ext
': 'webm
', 'width
': 1920, 'height
': 1080}, 
 251         '82': {'ext
': 'mp4
', 'height
': 360, 'format_note
': '3D
', 'preference
': -20}, 
 252         '83': {'ext
': 'mp4
', 'height
': 480, 'format_note
': '3D
', 'preference
': -20}, 
 253         '84': {'ext
': 'mp4
', 'height
': 720, 'format_note
': '3D
', 'preference
': -20}, 
 254         '85': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': '3D
', 'preference
': -20}, 
 255         '100': {'ext
': 'webm
', 'height
': 360, 'format_note
': '3D
', 'preference
': -20}, 
 256         '101': {'ext
': 'webm
', 'height
': 480, 'format_note
': '3D
', 'preference
': -20}, 
 257         '102': {'ext
': 'webm
', 'height
': 720, 'format_note
': '3D
', 'preference
': -20}, 
 259         # Apple HTTP Live Streaming 
 260         '92': {'ext
': 'mp4
', 'height
': 240, 'format_note
': 'HLS
', 'preference
': -10}, 
 261         '93': {'ext
': 'mp4
', 'height
': 360, 'format_note
': 'HLS
', 'preference
': -10}, 
 262         '94': {'ext
': 'mp4
', 'height
': 480, 'format_note
': 'HLS
', 'preference
': -10}, 
 263         '95': {'ext
': 'mp4
', 'height
': 720, 'format_note
': 'HLS
', 'preference
': -10}, 
 264         '96': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': 'HLS
', 'preference
': -10}, 
 265         '132': {'ext
': 'mp4
', 'height
': 240, 'format_note
': 'HLS
', 'preference
': -10}, 
 266         '151': {'ext
': 'mp4
', 'height
': 72, 'format_note
': 'HLS
', 'preference
': -10}, 
 269         '133': {'ext
': 'mp4
', 'height
': 240, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 270         '134': {'ext
': 'mp4
', 'height
': 360, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 271         '135': {'ext
': 'mp4
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 272         '136': {'ext
': 'mp4
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 273         '137': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 274         '138': {'ext
': 'mp4
', 'height
': 2160, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 275         '160': {'ext
': 'mp4
', 'height
': 144, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 276         '264': {'ext
': 'mp4
', 'height
': 1440, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 277         '298': {'ext
': 'mp4
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'h264
'}, 
 278         '299': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'h264
'}, 
 279         '266': {'ext
': 'mp4
', 'height
': 2160, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'vcodec
': 'h264
'}, 
 282         '139': {'ext
': 'm4a
', 'format_note
': 'DASH audio
', 'vcodec
': 'none
', 'abr
': 48, 'preference
': -50}, 
 283         '140': {'ext
': 'm4a
', 'format_note
': 'DASH audio
', 'vcodec
': 'none
', 'abr
': 128, 'preference
': -50}, 
 284         '141': {'ext
': 'm4a
', 'format_note
': 'DASH audio
', 'vcodec
': 'none
', 'abr
': 256, 'preference
': -50}, 
 287         '167': {'ext
': 'webm
', 'height
': 360, 'width
': 640, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 288         '168': {'ext
': 'webm
', 'height
': 480, 'width
': 854, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 289         '169': {'ext
': 'webm
', 'height
': 720, 'width
': 1280, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 290         '170': {'ext
': 'webm
', 'height
': 1080, 'width
': 1920, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 291         '218': {'ext
': 'webm
', 'height
': 480, 'width
': 854, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 292         '219': {'ext
': 'webm
', 'height
': 480, 'width
': 854, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 293         '278': {'ext
': 'webm
', 'height
': 144, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'container
': 'webm
', 'vcodec
': 'VP9
'}, 
 294         '242': {'ext
': 'webm
', 'height
': 240, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 295         '243': {'ext
': 'webm
', 'height
': 360, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 296         '244': {'ext
': 'webm
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 297         '245': {'ext
': 'webm
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 298         '246': {'ext
': 'webm
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 299         '247': {'ext
': 'webm
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 300         '248': {'ext
': 'webm
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 301         '271': {'ext
': 'webm
', 'height
': 1440, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 302         '272': {'ext
': 'webm
', 'height
': 2160, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 303         '302': {'ext
': 'webm
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'VP9
'}, 
 304         '303': {'ext
': 'webm
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'VP9
'}, 
 307         '171': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'abr
': 128, 'preference
': -50}, 
 308         '172': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'abr
': 256, 'preference
': -50}, 
 310         # Dash webm audio with opus inside 
 311         '249': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'acodec
': 'opus
', 'abr
': 50, 'preference
': -50}, 
 312         '250': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'acodec
': 'opus
', 'abr
': 70, 'preference
': -50}, 
 313         '251': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'acodec
': 'opus
', 'abr
': 160, 'preference
': -50}, 
 316         '_rtmp
': {'protocol
': 'rtmp
'}, 
 322             'url
': 'http
://www
.youtube
.com
/watch?v
=BaW_jenozKc
', 
 326                 'title
': 'youtube
-dl test video 
"\'/\\ä↭𝕐', 
 327                 'uploader': 'Philipp Hagemeister', 
 328                 'uploader_id': 'phihag', 
 329                 'upload_date': '20121002', 
 330                 'description': 'test chars:  "\'/\\ä↭𝕐
\ntest URL
: https
://github
.com
/rg3
/youtube
-dl
/issues
/1892\n\nThis 
is a test video 
for youtube
-dl
.\n\nFor more information
, contact phihag
@phihag.de .', 
 331                 'categories
': ['Science 
& Technology
'], 
 333                 'dislike_count
': int, 
 337             'url
': 'http
://www
.youtube
.com
/watch?v
=UxxajLWwzqY
', 
 338             'note
': 'Test generic use_cipher_signature 
video (#897)', 
 342                 'upload_date': '20120506', 
 343                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', 
 344                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', 
 345                 'uploader': 'Icona Pop', 
 346                 'uploader_id': 'IconaPop', 
 350             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ', 
 351             'note': 'Test VEVO video with age protection (#956)', 
 355                 'upload_date': '20130703', 
 356                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 
 357                 'description': 'md5:64249768eec3bc4276236606ea996373', 
 358                 'uploader': 'justintimberlakeVEVO', 
 359                 'uploader_id': 'justintimberlakeVEVO', 
 363             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', 
 364             'note': 'Embed-only video (#1746)', 
 368                 'upload_date': '20120608', 
 369                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', 
 370                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 
 371                 'uploader': 'SET India', 
 372                 'uploader_id': 'setindia' 
 376             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', 
 377             'note': '256k DASH audio (format 141) via DASH manifest', 
 381                 'upload_date': '20121002', 
 382                 'uploader_id': '8KVIDEO', 
 384                 'uploader': '8KVIDEO', 
 385                 'title': 'UHDTV TEST 8K VIDEO.mp4' 
 388                 'youtube_include_dash_manifest': True, 
 392         # DASH manifest with encrypted signature 
 394             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA', 
 398                 'title': 'Afrojack - The Spark ft. Spree Wilson', 
 399                 'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8', 
 400                 'uploader': 'AfrojackVEVO', 
 401                 'uploader_id': 'AfrojackVEVO', 
 402                 'upload_date': '20131011', 
 405                 'youtube_include_dash_manifest': True, 
 411     def __init__(self
, *args
, **kwargs
): 
 412         super(YoutubeIE
, self
).__init
__(*args
, **kwargs
) 
 413         self
._player
_cache 
= {} 
 415     def report_video_info_webpage_download(self
, video_id
): 
 416         """Report attempt to download video info webpage.""" 
 417         self
.to_screen('%s: Downloading video info webpage' % video_id
) 
 419     def report_information_extraction(self
, video_id
): 
 420         """Report attempt to extract video information.""" 
 421         self
.to_screen('%s: Extracting video information' % video_id
) 
 423     def report_unavailable_format(self
, video_id
, format
): 
 424         """Report extracted video URL.""" 
 425         self
.to_screen('%s: Format %s not available' % (video_id
, format
)) 
 427     def report_rtmp_download(self
): 
 428         """Indicate the download will use the RTMP protocol.""" 
 429         self
.to_screen('RTMP download detected') 
 431     def _signature_cache_id(self
, example_sig
): 
 432         """ Return a string representation of a signature """ 
 433         return '.'.join(compat_str(len(part
)) for part 
in example_sig
.split('.')) 
 435     def _extract_signature_function(self
, video_id
, player_url
, example_sig
): 
 437             r
'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$', 
 440             raise ExtractorError('Cannot identify player %r' % player_url
) 
 441         player_type 
= id_m
.group('ext') 
 442         player_id 
= id_m
.group('id') 
 444         # Read from filesystem cache 
 445         func_id 
= '%s_%s_%s' % ( 
 446             player_type
, player_id
, self
._signature
_cache
_id
(example_sig
)) 
 447         assert os
.path
.basename(func_id
) == func_id
 
 449         cache_spec 
= self
._downloader
.cache
.load('youtube-sigfuncs', func_id
) 
 450         if cache_spec 
is not None: 
 451             return lambda s
: ''.join(s
[i
] for i 
in cache_spec
) 
 453         if player_type 
== 'js': 
 454             code 
= self
._download
_webpage
( 
 455                 player_url
, video_id
, 
 456                 note
='Downloading %s player %s' % (player_type
, player_id
), 
 457                 errnote
='Download of %s failed' % player_url
) 
 458             res 
= self
._parse
_sig
_js
(code
) 
 459         elif player_type 
== 'swf': 
 460             urlh 
= self
._request
_webpage
( 
 461                 player_url
, video_id
, 
 462                 note
='Downloading %s player %s' % (player_type
, player_id
), 
 463                 errnote
='Download of %s failed' % player_url
) 
 465             res 
= self
._parse
_sig
_swf
(code
) 
 467             assert False, 'Invalid player type %r' % player_type
 
 469         if cache_spec 
is None: 
 470             test_string 
= ''.join(map(compat_chr
, range(len(example_sig
)))) 
 471             cache_res 
= res(test_string
) 
 472             cache_spec 
= [ord(c
) for c 
in cache_res
] 
 474         self
._downloader
.cache
.store('youtube-sigfuncs', func_id
, cache_spec
) 
 477     def _print_sig_code(self
, func
, example_sig
): 
 478         def gen_sig_code(idxs
): 
 479             def _genslice(start
, end
, step
): 
 480                 starts 
= '' if start 
== 0 else str(start
) 
 481                 ends 
= (':%d' % (end
+step
)) if end 
+ step 
>= 0 else ':' 
 482                 steps 
= '' if step 
== 1 else (':%d' % step
) 
 483                 return 's[%s%s%s]' % (starts
, ends
, steps
) 
 486             start 
= '(Never used)'  # Quelch pyflakes warnings - start will be 
 487                                     # set as soon as step is set 
 488             for i
, prev 
in zip(idxs
[1:], idxs
[:-1]): 
 492                     yield _genslice(start
, prev
, step
) 
 495                 if i 
- prev 
in [-1, 1]: 
 504                 yield _genslice(start
, i
, step
) 
 506         test_string 
= ''.join(map(compat_chr
, range(len(example_sig
)))) 
 507         cache_res 
= func(test_string
) 
 508         cache_spec 
= [ord(c
) for c 
in cache_res
] 
 509         expr_code 
= ' + '.join(gen_sig_code(cache_spec
)) 
 510         signature_id_tuple 
= '(%s)' % ( 
 511             ', '.join(compat_str(len(p
)) for p 
in example_sig
.split('.'))) 
 512         code 
= ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' 
 513                 '    return %s\n') % (signature_id_tuple
, expr_code
) 
 514         self
.to_screen('Extracted signature function:\n' + code
) 
 516     def _parse_sig_js(self
, jscode
): 
 517         funcname 
= self
._search
_regex
( 
 518             r
'\.sig\|\|([a-zA-Z0-9]+)\(', jscode
, 
 519              'Initial JS player signature function name') 
 521         jsi 
= JSInterpreter(jscode
) 
 522         initial_function 
= jsi
.extract_function(funcname
) 
 523         return lambda s
: initial_function([s
]) 
 525     def _parse_sig_swf(self
, file_contents
): 
 526         swfi 
= SWFInterpreter(file_contents
) 
 527         TARGET_CLASSNAME 
= 'SignatureDecipher' 
 528         searched_class 
= swfi
.extract_class(TARGET_CLASSNAME
) 
 529         initial_function 
= swfi
.extract_function(searched_class
, 'decipher') 
 530         return lambda s
: initial_function([s
]) 
 532     def _decrypt_signature(self
, s
, video_id
, player_url
, age_gate
=False): 
 533         """Turn the encrypted s field into a working signature""" 
 535         if player_url 
is None: 
 536             raise ExtractorError('Cannot decrypt signature without player_url') 
 538         if player_url
.startswith('//'): 
 539             player_url 
= 'https:' + player_url
 
 541             player_id 
= (player_url
, self
._signature
_cache
_id
(s
)) 
 542             if player_id 
not in self
._player
_cache
: 
 543                 func 
= self
._extract
_signature
_function
( 
 544                     video_id
, player_url
, s
 
 546                 self
._player
_cache
[player_id
] = func
 
 547             func 
= self
._player
_cache
[player_id
] 
 548             if self
._downloader
.params
.get('youtube_print_sig_code'): 
 549                 self
._print
_sig
_code
(func
, s
) 
 551         except Exception as e
: 
 552             tb 
= traceback
.format_exc() 
 553             raise ExtractorError( 
 554                 'Signature extraction failed: ' + tb
, cause
=e
) 
 556     def _get_available_subtitles(self
, video_id
, webpage
): 
 558             sub_list 
= self
._download
_webpage
( 
 559                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
, 
 560                 video_id
, note
=False) 
 561         except ExtractorError 
as err
: 
 562             self
._downloader
.report_warning('unable to download video subtitles: %s' % compat_str(err
)) 
 564         lang_list 
= re
.findall(r
'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list
) 
 569             if lang 
in sub_lang_list
: 
 571             params 
= compat_urllib_parse
.urlencode({ 
 574                 'fmt': self
._downloader
.params
.get('subtitlesformat', 'srt'), 
 575                 'name': unescapeHTML(l
[0]).encode('utf-8'), 
 577             url 
= 'https://www.youtube.com/api/timedtext?' + params
 
 578             sub_lang_list
[lang
] = url
 
 579         if not sub_lang_list
: 
 580             self
._downloader
.report_warning('video doesn\'t have subtitles') 
 584     def _get_available_automatic_caption(self
, video_id
, webpage
): 
 585         """We need the webpage for getting the captions url, pass it as an 
 586            argument to speed up the process.""" 
 587         sub_format 
= self
._downloader
.params
.get('subtitlesformat', 'srt') 
 588         self
.to_screen('%s: Looking for automatic captions' % video_id
) 
 589         mobj 
= re
.search(r
';ytplayer.config = ({.*?});', webpage
) 
 590         err_msg 
= 'Couldn\'t find automatic captions for %s' % video_id
 
 592             self
._downloader
.report_warning(err_msg
) 
 594         player_config 
= json
.loads(mobj
.group(1)) 
 596             args 
= player_config
[u
'args'] 
 597             caption_url 
= args
[u
'ttsurl'] 
 598             timestamp 
= args
[u
'timestamp'] 
 599             # We get the available subtitles 
 600             list_params 
= compat_urllib_parse
.urlencode({ 
 605             list_url 
= caption_url 
+ '&' + list_params
 
 606             caption_list 
= self
._download
_xml
(list_url
, video_id
) 
 607             original_lang_node 
= caption_list
.find('track') 
 608             if original_lang_node 
is None or original_lang_node
.attrib
.get('kind') != 'asr' : 
 609                 self
._downloader
.report_warning('Video doesn\'t have automatic captions') 
 611             original_lang 
= original_lang_node
.attrib
['lang_code'] 
 614             for lang_node 
in caption_list
.findall('target'): 
 615                 sub_lang 
= lang_node
.attrib
['lang_code'] 
 616                 params 
= compat_urllib_parse
.urlencode({ 
 617                     'lang': original_lang
, 
 623                 sub_lang_list
[sub_lang
] = caption_url 
+ '&' + params
 
 625         # An extractor error can be raise by the download process if there are 
 626         # no automatic captions but there are subtitles 
 627         except (KeyError, ExtractorError
): 
 628             self
._downloader
.report_warning(err_msg
) 
 632     def extract_id(cls
, url
): 
 633         mobj 
= re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) 
 635             raise ExtractorError('Invalid URL: %s' % url
) 
 636         video_id 
= mobj
.group(2) 
 639     def _extract_from_m3u8(self
, manifest_url
, video_id
): 
 641         def _get_urls(_manifest
): 
 642             lines 
= _manifest
.split('\n') 
 643             urls 
= filter(lambda l
: l 
and not l
.startswith('#'), 
 646         manifest 
= self
._download
_webpage
(manifest_url
, video_id
, 'Downloading formats manifest') 
 647         formats_urls 
= _get_urls(manifest
) 
 648         for format_url 
in formats_urls
: 
 649             itag 
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag') 
 650             url_map
[itag
] = format_url
 
 653     def _extract_annotations(self
, video_id
): 
 654         url 
= 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 
 655         return self
._download
_webpage
(url
, video_id
, note
='Searching for annotations.', errnote
='Unable to download video annotations.') 
 657     def _real_extract(self
, url
): 
 659             'http' if self
._downloader
.params
.get('prefer_insecure', False) 
 662         # Extract original video URL from URL with redirection, like age verification, using next_url parameter 
 663         mobj 
= re
.search(self
._NEXT
_URL
_RE
, url
) 
 665             url 
= proto 
+ '://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/') 
 666         video_id 
= self
.extract_id(url
) 
 669         url 
= proto 
+ '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
 
 671             c 
for c 
in self
._downloader
.cookiejar
 
 672             if c
.domain 
== '.youtube.com' and c
.name 
== 'PREF'] 
 673         for pc 
in pref_cookies
: 
 674             if 'hl=' in pc
.value
: 
 675                 pc
.value 
= re
.sub(r
'hl=[^&]+', 'hl=en', pc
.value
) 
 680         video_webpage 
= self
._download
_webpage
(url
, video_id
) 
 682         # Attempt to extract SWF player URL 
 683         mobj 
= re
.search(r
'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
 685             player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
 690         self
.report_video_info_webpage_download(video_id
) 
 691         if re
.search(r
'player-age-gate-content">', video_webpage
) is not None: 
 693             # We simulate the access to the video from www.youtube.com/v/{video_id} 
 694             # this can be viewed without login into Youtube 
 695             data 
= compat_urllib_parse
.urlencode({ 
 696                 'video_id': video_id
, 
 697                 'eurl': 'https://youtube.googleapis.com/v/' + video_id
, 
 698                 'sts': self
._search
_regex
( 
 699                     r
'"sts"\s*:\s*(\d+)', video_webpage
, 'sts', default
=''), 
 701             video_info_url 
= proto 
+ '://www.youtube.com/get_video_info?' + data
 
 702             video_info_webpage 
= self
._download
_webpage
( 
 703                 video_info_url
, video_id
, 
 704                 note
='Refetching age-gated info webpage', 
 705                 errnote
='unable to download video info webpage') 
 706             video_info 
= compat_parse_qs(video_info_webpage
) 
 709             for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
 710                 video_info_url 
= (proto 
+ '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
 711                         % (video_id
, el_type
)) 
 712                 video_info_webpage 
= self
._download
_webpage
(video_info_url
, video_id
, 
 714                                         errnote
='unable to download video info webpage') 
 715                 video_info 
= compat_parse_qs(video_info_webpage
) 
 716                 if 'token' in video_info
: 
 718         if 'token' not in video_info
: 
 719             if 'reason' in video_info
: 
 720                 raise ExtractorError( 
 721                     'YouTube said: %s' % video_info
['reason'][0], 
 722                     expected
=True, video_id
=video_id
) 
 724                 raise ExtractorError( 
 725                     '"token" parameter not in video info for unknown reason', 
 728         if 'view_count' in video_info
: 
 729             view_count 
= int(video_info
['view_count'][0]) 
 733         # Check for "rental" videos 
 734         if 'ypc_video_rental_bar_text' in video_info 
and 'author' not in video_info
: 
 735             raise ExtractorError('"rental" videos not supported') 
 737         # Start extracting information 
 738         self
.report_information_extraction(video_id
) 
 741         if 'author' not in video_info
: 
 742             raise ExtractorError('Unable to extract uploader name') 
 743         video_uploader 
= compat_urllib_parse
.unquote_plus(video_info
['author'][0]) 
 746         video_uploader_id 
= None 
 747         mobj 
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
) 
 749             video_uploader_id 
= mobj
.group(1) 
 751             self
._downloader
.report_warning('unable to extract uploader nickname') 
 754         if 'title' in video_info
: 
 755             video_title 
= video_info
['title'][0] 
 757             self
._downloader
.report_warning('Unable to extract video title') 
 761         # We try first to get a high quality image: 
 762         m_thumb 
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">', 
 763                             video_webpage
, re
.DOTALL
) 
 764         if m_thumb 
is not None: 
 765             video_thumbnail 
= m_thumb
.group(1) 
 766         elif 'thumbnail_url' not in video_info
: 
 767             self
._downloader
.report_warning('unable to extract video thumbnail') 
 768             video_thumbnail 
= None 
 769         else:   # don't panic if we can't find it 
 770             video_thumbnail 
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0]) 
 774         mobj 
= re
.search(r
'(?s)id="eow-date.*?>(.*?)</span>', video_webpage
) 
 777                 r
'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>', 
 780             upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
 781             upload_date 
= unified_strdate(upload_date
) 
 783         m_cat_container 
= self
._search
_regex
( 
 784             r
'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', 
 785             video_webpage
, 'categories', fatal
=False) 
 787             category 
= self
._html
_search
_regex
( 
 788                 r
'(?s)<a[^<]+>(.*?)</a>', m_cat_container
, 'category', 
 790             video_categories 
= None if category 
is None else [category
] 
 792             video_categories 
= None 
 795         video_description 
= get_element_by_id("eow-description", video_webpage
) 
 796         if video_description
: 
 797             video_description 
= re
.sub(r
'''(?x) 
 799                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
 801                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
 802                     class="yt-uix-redirect-link"\s*> 
 805             ''', r
'\1', video_description
) 
 806             video_description 
= clean_html(video_description
) 
 808             fd_mobj 
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
) 
 810                 video_description 
= unescapeHTML(fd_mobj
.group(1)) 
 812                 video_description 
= '' 
 814         def _extract_count(count_name
): 
 815             count 
= self
._search
_regex
( 
 816                 r
'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re
.escape(count_name
), 
 817                 video_webpage
, count_name
, default
=None) 
 818             if count 
is not None: 
 819                 return int(count
.replace(',', '')) 
 821         like_count 
= _extract_count('like') 
 822         dislike_count 
= _extract_count('dislike') 
 825         video_subtitles 
= self
.extract_subtitles(video_id
, video_webpage
) 
 827         if self
._downloader
.params
.get('listsubtitles', False): 
 828             self
._list
_available
_subtitles
(video_id
, video_webpage
) 
 831         if 'length_seconds' not in video_info
: 
 832             self
._downloader
.report_warning('unable to extract video duration') 
 833             video_duration 
= None 
 835             video_duration 
= int(compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0])) 
 838         video_annotations 
= None 
 839         if self
._downloader
.params
.get('writeannotations', False): 
 840                 video_annotations 
= self
._extract
_annotations
(video_id
) 
 842         # Decide which formats to download 
 844             mobj 
= re
.search(r
';ytplayer\.config\s*=\s*({.*?});', video_webpage
) 
 846                 raise ValueError('Could not find vevo ID') 
 847             json_code 
= uppercase_escape(mobj
.group(1)) 
 848             ytplayer_config 
= json
.loads(json_code
) 
 849             args 
= ytplayer_config
['args'] 
 850             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map 
 851             # this signatures are encrypted 
 852             if 'url_encoded_fmt_stream_map' not in args
: 
 853                 raise ValueError('No stream_map present')  # caught below 
 854             re_signature 
= re
.compile(r
'[&,]s=') 
 855             m_s 
= re_signature
.search(args
['url_encoded_fmt_stream_map']) 
 857                 self
.to_screen('%s: Encrypted signatures detected.' % video_id
) 
 858                 video_info
['url_encoded_fmt_stream_map'] = [args
['url_encoded_fmt_stream_map']] 
 859             m_s 
= re_signature
.search(args
.get('adaptive_fmts', '')) 
 861                 if 'adaptive_fmts' in video_info
: 
 862                     video_info
['adaptive_fmts'][0] += ',' + args
['adaptive_fmts'] 
 864                     video_info
['adaptive_fmts'] = [args
['adaptive_fmts']] 
 868         def _map_to_format_list(urlmap
): 
 870             for itag
, video_real_url 
in urlmap
.items(): 
 873                     'url': video_real_url
, 
 874                     'player_url': player_url
, 
 876                 if itag 
in self
._formats
: 
 877                     dct
.update(self
._formats
[itag
]) 
 881         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
 882             self
.report_rtmp_download() 
 884                 'format_id': '_rtmp', 
 886                 'url': video_info
['conn'][0], 
 887                 'player_url': player_url
, 
 889         elif len(video_info
.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info
.get('adaptive_fmts', [])) >= 1: 
 890             encoded_url_map 
= video_info
.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info
.get('adaptive_fmts',[''])[0] 
 891             if 'rtmpe%3Dyes' in encoded_url_map
: 
 892                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True) 
 894             for url_data_str 
in encoded_url_map
.split(','): 
 895                 url_data 
= compat_parse_qs(url_data_str
) 
 896                 if 'itag' not in url_data 
or 'url' not in url_data
: 
 898                 format_id 
= url_data
['itag'][0] 
 899                 url 
= url_data
['url'][0] 
 901                 if 'sig' in url_data
: 
 902                     url 
+= '&signature=' + url_data
['sig'][0] 
 903                 elif 's' in url_data
: 
 904                     encrypted_sig 
= url_data
['s'][0] 
 907                         jsplayer_url_json 
= self
._search
_regex
( 
 908                             r
'"assets":.+?"js":\s*("[^"]+")', 
 909                             video_webpage
, 'JS player URL') 
 910                         player_url 
= json
.loads(jsplayer_url_json
) 
 911                     if player_url 
is None: 
 912                         player_url_json 
= self
._search
_regex
( 
 913                             r
'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', 
 914                             video_webpage
, 'age gate player URL') 
 915                         player_url 
= json
.loads(player_url_json
) 
 917                     if self
._downloader
.params
.get('verbose'): 
 918                         if player_url 
is None: 
 919                             player_version 
= 'unknown' 
 920                             player_desc 
= 'unknown' 
 922                             if player_url
.endswith('swf'): 
 923                                 player_version 
= self
._search
_regex
( 
 924                                     r
'-(.+?)(?:/watch_as3)?\.swf$', player_url
, 
 925                                     'flash player', fatal
=False) 
 926                                 player_desc 
= 'flash player %s' % player_version
 
 928                                 player_version 
= self
._search
_regex
( 
 929                                     r
'html5player-([^/]+?)(?:/html5player)?\.js', 
 931                                     'html5 player', fatal
=False) 
 932                                 player_desc 
= 'html5 player %s' % player_version
 
 934                         parts_sizes 
= self
._signature
_cache
_id
(encrypted_sig
) 
 935                         self
.to_screen('{%s} signature length %s, %s' % 
 936                             (format_id
, parts_sizes
, player_desc
)) 
 938                     signature 
= self
._decrypt
_signature
( 
 939                         encrypted_sig
, video_id
, player_url
, age_gate
) 
 940                     url 
+= '&signature=' + signature
 
 941                 if 'ratebypass' not in url
: 
 942                     url 
+= '&ratebypass=yes' 
 943                 url_map
[format_id
] = url
 
 944             formats 
= _map_to_format_list(url_map
) 
 945         elif video_info
.get('hlsvp'): 
 946             manifest_url 
= video_info
['hlsvp'][0] 
 947             url_map 
= self
._extract
_from
_m
3u8(manifest_url
, video_id
) 
 948             formats 
= _map_to_format_list(url_map
) 
 950             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') 
 952         # Look for the DASH manifest 
 953         if self
._downloader
.params
.get('youtube_include_dash_manifest', True): 
 955                 # The DASH manifest used needs to be the one from the original video_webpage. 
 956                 # The one found in get_video_info seems to be using different signatures. 
 957                 # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. 
 958                 # Luckily, it seems, this case uses some kind of default signature (len == 86), so the 
 959                 # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. 
 961                     dash_manifest_url 
= video_info
.get('dashmpd')[0] 
 963                     dash_manifest_url 
= ytplayer_config
['args']['dashmpd'] 
 964                 def decrypt_sig(mobj
): 
 966                     dec_s 
= self
._decrypt
_signature
(s
, video_id
, player_url
, age_gate
) 
 967                     return '/signature/%s' % dec_s
 
 968                 dash_manifest_url 
= re
.sub(r
'/s/([\w\.]+)', decrypt_sig
, dash_manifest_url
) 
 969                 dash_doc 
= self
._download
_xml
( 
 970                     dash_manifest_url
, video_id
, 
 971                     note
='Downloading DASH manifest', 
 972                     errnote
='Could not download DASH manifest') 
 973                 for r 
in dash_doc
.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): 
 974                     url_el 
= r
.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') 
 977                     format_id 
= r
.attrib
['id'] 
 978                     video_url 
= url_el
.text
 
 979                     filesize 
= int_or_none(url_el
.attrib
.get('{http://youtube.com/yt/2012/10/10}contentLength')) 
 981                         'format_id': format_id
, 
 983                         'width': int_or_none(r
.attrib
.get('width')), 
 984                         'tbr': int_or_none(r
.attrib
.get('bandwidth'), 1000), 
 985                         'asr': int_or_none(r
.attrib
.get('audioSamplingRate')), 
 986                         'filesize': filesize
, 
 989                         existing_format 
= next( 
 991                             if fo
['format_id'] == format_id
) 
 992                     except StopIteration: 
 993                         f
.update(self
._formats
.get(format_id
, {})) 
 996                         existing_format
.update(f
) 
 998             except (ExtractorError
, KeyError) as e
: 
 999                 self
.report_warning('Skipping DASH manifest: %r' % e
, video_id
) 
1001         self
._sort
_formats
(formats
) 
1005             'uploader':     video_uploader
, 
1006             'uploader_id':  video_uploader_id
, 
1007             'upload_date':  upload_date
, 
1008             'title':        video_title
, 
1009             'thumbnail':    video_thumbnail
, 
1010             'description':  video_description
, 
1011             'categories':   video_categories
, 
1012             'subtitles':    video_subtitles
, 
1013             'duration':     video_duration
, 
1014             'age_limit':    18 if age_gate 
else 0, 
1015             'annotations':  video_annotations
, 
1016             'webpage_url': proto 
+ '://www.youtube.com/watch?v=%s' % video_id
, 
1017             'view_count':   view_count
, 
1018             'like_count': like_count
, 
1019             'dislike_count': dislike_count
, 
1023 class YoutubePlaylistIE(YoutubeBaseInfoExtractor
): 
1024     IE_DESC 
= 'YouTube.com playlists' 
1025     _VALID_URL 
= r
"""(?x)(?: 
1030                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) 
1031                            \? (?:.*?&)*? (?:p|a|list)= 
1035                             (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} 
1036                             # Top tracks, they can also include dots  
1041                         ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) 
1043     _TEMPLATE_URL 
= 'https://www.youtube.com/playlist?list=%s' 
1044     _MORE_PAGES_INDICATOR 
= r
'data-link-type="next"' 
1045     _VIDEO_RE 
= r
'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' 
1046     IE_NAME 
= 'youtube:playlist' 
1048         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 
1050             'title': 'ytdl test PL', 
1051             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 
1053         'playlist_count': 3, 
1055         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 
1057             'title': 'YDL_Empty_List', 
1059         'playlist_count': 0, 
1061         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 
1062         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 
1064             'title': '29C3: Not my department', 
1066         'playlist_count': 95, 
1068         'note': 'issue #673', 
1069         'url': 'PLBB231211A4F62143', 
1071             'title': '[OLD]Team Fortress 2 (Class-based LP)', 
1073         'playlist_mincount': 26, 
1075         'note': 'Large playlist', 
1076         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 
1078             'title': 'Uploads from Cauchemar', 
1080         'playlist_mincount': 799, 
1082         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 
1084             'title': 'YDL_safe_search', 
1086         'playlist_count': 2, 
1089         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 
1090         'playlist_count': 4, 
1095         'note': 'Embedded SWF player', 
1096         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', 
1097         'playlist_count': 4, 
1103     def _real_initialize(self
): 
1106     def _ids_to_results(self
, ids
): 
1108             self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
) 
1111     def _extract_mix(self
, playlist_id
): 
1112         # The mixes are generated from a a single video 
1113         # the id of the playlist is just 'RD' + video_id 
1114         url 
= 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id
[-11:], playlist_id
) 
1115         webpage 
= self
._download
_webpage
( 
1116             url
, playlist_id
, 'Downloading Youtube mix') 
1117         search_title 
= lambda class_name
: get_element_by_attribute('class', class_name
, webpage
) 
1119             search_title('playlist-title') or 
1120             search_title('title long-title') or 
1121             search_title('title')) 
1122         title 
= clean_html(title_span
) 
1123         ids 
= orderedSet(re
.findall( 
1124             r
'''(?xs)data-video-username=".*?".*? 
1125                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re
.escape(playlist_id
), 
1127         url_results 
= self
._ids
_to
_results
(ids
) 
1129         return self
.playlist_result(url_results
, playlist_id
, title
) 
1131     def _real_extract(self
, url
): 
1132         # Extract playlist id 
1133         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1135             raise ExtractorError('Invalid URL: %s' % url
) 
1136         playlist_id 
= mobj
.group(1) or mobj
.group(2) 
1138         # Check if it's a video-specific URL 
1139         query_dict 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
1140         if 'v' in query_dict
: 
1141             video_id 
= query_dict
['v'][0] 
1142             if self
._downloader
.params
.get('noplaylist'): 
1143                 self
.to_screen('Downloading just video %s because of --no-playlist' % video_id
) 
1144                 return self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1146                 self
.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id
, video_id
)) 
1148         if playlist_id
.startswith('RD'): 
1149             # Mixes require a custom extraction process 
1150             return self
._extract
_mix
(playlist_id
) 
1151         if playlist_id
.startswith('TL'): 
1152             raise ExtractorError('For downloading YouTube.com top lists, use ' 
1153                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected
=True) 
1155         url 
= self
._TEMPLATE
_URL 
% playlist_id
 
1156         page 
= self
._download
_webpage
(url
, playlist_id
) 
1157         more_widget_html 
= content_html 
= page
 
1159         # Check if the playlist exists or is private 
1160         if re
.search(r
'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page
) is not None: 
1161             raise ExtractorError( 
1162                 'The playlist doesn\'t exist or is private, use --username or ' 
1163                 '--netrc to access it.', 
1166         # Extract the video ids from the playlist pages 
1169         for page_num 
in itertools
.count(1): 
1170             matches 
= re
.finditer(self
._VIDEO
_RE
, content_html
) 
1171             # We remove the duplicates and the link with index 0 
1172             # (it's not the first video of the playlist) 
1173             new_ids 
= orderedSet(m
.group('id') for m 
in matches 
if m
.group('index') != '0') 
1176             mobj 
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
) 
1180             more 
= self
._download
_json
( 
1181                 'https://youtube.com/%s' % mobj
.group('more'), playlist_id
, 
1182                 'Downloading page #%s' % page_num
, 
1183                 transform_source
=uppercase_escape
) 
1184             content_html 
= more
['content_html'] 
1185             more_widget_html 
= more
['load_more_widget_html'] 
1187         playlist_title 
= self
._html
_search
_regex
( 
1188             r
'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', 
1191         url_results 
= self
._ids
_to
_results
(ids
) 
1192         return self
.playlist_result(url_results
, playlist_id
, playlist_title
) 
1195 class YoutubeTopListIE(YoutubePlaylistIE
): 
1196     IE_NAME 
= 'youtube:toplist' 
1197     IE_DESC 
= ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' 
1198         ' (Example: "yttoplist:music:Top Tracks")') 
1199     _VALID_URL 
= r
'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' 
1201         'url': 'yttoplist:music:Trending', 
1202         'playlist_mincount': 5, 
1203         'skip': 'Only works for logged-in users', 
1206     def _real_extract(self
, url
): 
1207         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1208         channel 
= mobj
.group('chann') 
1209         title 
= mobj
.group('title') 
1210         query 
= compat_urllib_parse
.urlencode({'title': title
}) 
1211         channel_page 
= self
._download
_webpage
( 
1212             'https://www.youtube.com/%s' % channel
, title
) 
1213         link 
= self
._html
_search
_regex
( 
1215                 <a\s+href="([^"]+)".*?>\s* 
1216                 <span\s+class="branded-page-module-title-text">\s* 
1217                 <span[^>]*>.*?%s.*?</span>''' % re
.escape(query
), 
1218             channel_page
, 'list') 
1219         url 
= compat_urlparse
.urljoin('https://www.youtube.com/', link
) 
1221         video_re 
= r
'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' 
1223         # sometimes the webpage doesn't contain the videos 
1224         # retry until we get them 
1225         for i 
in itertools
.count(0): 
1226             msg 
= 'Downloading Youtube mix' 
1228                 msg 
+= ', retry #%d' % i
 
1230             webpage 
= self
._download
_webpage
(url
, title
, msg
) 
1231             ids 
= orderedSet(re
.findall(video_re
, webpage
)) 
1234         url_results 
= self
._ids
_to
_results
(ids
) 
1235         return self
.playlist_result(url_results
, playlist_title
=title
) 
1238 class YoutubeChannelIE(InfoExtractor
): 
1239     IE_DESC 
= 'YouTube.com channels' 
1240     _VALID_URL 
= r
"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" 
1241     _MORE_PAGES_INDICATOR 
= 'yt-uix-load-more' 
1242     _MORE_PAGES_URL 
= 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' 
1243     IE_NAME 
= 'youtube:channel' 
1245         'note': 'paginated channel', 
1246         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 
1247         'playlist_mincount': 91, 
1250     def extract_videos_from_page(self
, page
): 
1252         for mobj 
in re
.finditer(r
'href="/watch\?v=([0-9A-Za-z_-]+)&?', page
): 
1253             if mobj
.group(1) not in ids_in_page
: 
1254                 ids_in_page
.append(mobj
.group(1)) 
1257     def _real_extract(self
, url
): 
1258         # Extract channel id 
1259         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1261             raise ExtractorError('Invalid URL: %s' % url
) 
1263         # Download channel page 
1264         channel_id 
= mobj
.group(1) 
1266         url 
= 'https://www.youtube.com/channel/%s/videos' % channel_id
 
1267         channel_page 
= self
._download
_webpage
(url
, channel_id
) 
1268         autogenerated 
= re
.search(r
'''(?x) 
1270                     channel-header-autogenerated-label| 
1271                     yt-channel-title-autogenerated 
1272                 )[^"]*"''', channel_page
) is not None 
1275             # The videos are contained in a single page 
1276             # the ajax pages can't be used, they are empty 
1277             video_ids 
= self
.extract_videos_from_page(channel_page
) 
1279             # Download all channel pages using the json-based channel_ajax query 
1280             for pagenum 
in itertools
.count(1): 
1281                 url 
= self
._MORE
_PAGES
_URL 
% (pagenum
, channel_id
) 
1282                 page 
= self
._download
_json
( 
1283                     url
, channel_id
, note
='Downloading page #%s' % pagenum
, 
1284                     transform_source
=uppercase_escape
) 
1286                 ids_in_page 
= self
.extract_videos_from_page(page
['content_html']) 
1287                 video_ids
.extend(ids_in_page
) 
1289                 if self
._MORE
_PAGES
_INDICATOR 
not in page
['load_more_widget_html']: 
1292         self
._downloader
.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id
, len(video_ids
))) 
1294         url_entries 
= [self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1295                        for video_id 
in video_ids
] 
1296         return self
.playlist_result(url_entries
, channel_id
) 
1299 class YoutubeUserIE(InfoExtractor
): 
1300     IE_DESC 
= 'YouTube.com user videos (URL or "ytuser" keyword)' 
1301     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' 
1302     _TEMPLATE_URL 
= 'https://gdata.youtube.com/feeds/api/users/%s' 
1303     _GDATA_PAGE_SIZE 
= 50 
1304     _GDATA_URL 
= 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' 
1305     IE_NAME 
= 'youtube:user' 
1308         'url': 'https://www.youtube.com/user/TheLinuxFoundation', 
1309         'playlist_mincount': 320, 
1311             'title': 'TheLinuxFoundation', 
1314         'url': 'ytuser:phihag', 
1315         'only_matching': True, 
1319     def suitable(cls
, url
): 
1320         # Don't return True if the url can be extracted with other youtube 
1321         # extractor, the regex would is too permissive and it would match. 
1322         other_ies 
= iter(klass 
for (name
, klass
) in globals().items() if name
.endswith('IE') and klass 
is not cls
) 
1323         if any(ie
.suitable(url
) for ie 
in other_ies
): return False 
1324         else: return super(YoutubeUserIE
, cls
).suitable(url
) 
1326     def _real_extract(self
, url
): 
1328         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1330             raise ExtractorError('Invalid URL: %s' % url
) 
1332         username 
= mobj
.group(1) 
1334         # Download video ids using YouTube Data API. Result size per 
1335         # query is limited (currently to 50 videos) so we need to query 
1336         # page by page until there are no video ids - it means we got 
1339         def download_page(pagenum
): 
1340             start_index 
= pagenum 
* self
._GDATA
_PAGE
_SIZE 
+ 1 
1342             gdata_url 
= self
._GDATA
_URL 
% (username
, self
._GDATA
_PAGE
_SIZE
, start_index
) 
1343             page 
= self
._download
_webpage
( 
1344                 gdata_url
, username
, 
1345                 'Downloading video ids from %d to %d' % ( 
1346                     start_index
, start_index 
+ self
._GDATA
_PAGE
_SIZE
)) 
1349                 response 
= json
.loads(page
) 
1350             except ValueError as err
: 
1351                 raise ExtractorError('Invalid JSON in API response: ' + compat_str(err
)) 
1352             if 'entry' not in response
['feed']: 
1355             # Extract video identifiers 
1356             entries 
= response
['feed']['entry'] 
1357             for entry 
in entries
: 
1358                 title 
= entry
['title']['$t'] 
1359                 video_id 
= entry
['id']['$t'].split('/')[-1] 
1363                     'ie_key': 'Youtube', 
1367         url_results 
= OnDemandPagedList(download_page
, self
._GDATA
_PAGE
_SIZE
) 
1369         return self
.playlist_result(url_results
, playlist_title
=username
) 
1372 class YoutubeSearchIE(SearchInfoExtractor
): 
1373     IE_DESC 
= 'YouTube.com searches' 
1374     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' 
1376     IE_NAME 
= 'youtube:search' 
1377     _SEARCH_KEY 
= 'ytsearch' 
1379     def _get_n_results(self
, query
, n
): 
1380         """Get a specified number of results for a query""" 
1387         while (PAGE_SIZE 
* pagenum
) < limit
: 
1388             result_url 
= self
._API
_URL 
% ( 
1389                 compat_urllib_parse
.quote_plus(query
.encode('utf-8')), 
1390                 (PAGE_SIZE 
* pagenum
) + 1) 
1391             data_json 
= self
._download
_webpage
( 
1392                 result_url
, video_id
='query "%s"' % query
, 
1393                 note
='Downloading page %s' % (pagenum 
+ 1), 
1394                 errnote
='Unable to download API page') 
1395             data 
= json
.loads(data_json
) 
1396             api_response 
= data
['data'] 
1398             if 'items' not in api_response
: 
1399                 raise ExtractorError( 
1400                     '[youtube] No video results', expected
=True) 
1402             new_ids 
= list(video
['id'] for video 
in api_response
['items']) 
1403             video_ids 
+= new_ids
 
1405             limit 
= min(n
, api_response
['totalItems']) 
1408         if len(video_ids
) > n
: 
1409             video_ids 
= video_ids
[:n
] 
1410         videos 
= [self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1411                   for video_id 
in video_ids
] 
1412         return self
.playlist_result(videos
, query
) 
1415 class YoutubeSearchDateIE(YoutubeSearchIE
): 
1416     IE_NAME 
= YoutubeSearchIE
.IE_NAME 
+ ':date' 
1417     _API_URL 
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' 
1418     _SEARCH_KEY 
= 'ytsearchdate' 
1419     IE_DESC 
= 'YouTube.com searches, newest videos first' 
1422 class YoutubeSearchURLIE(InfoExtractor
): 
1423     IE_DESC 
= 'YouTube.com search URLs' 
1424     IE_NAME 
= 'youtube:search_url' 
1425     _VALID_URL 
= r
'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' 
1427         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 
1428         'playlist_mincount': 5, 
1430             'title': 'youtube-dl test video', 
1434     def _real_extract(self
, url
): 
1435         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1436         query 
= compat_urllib_parse
.unquote_plus(mobj
.group('query')) 
1438         webpage 
= self
._download
_webpage
(url
, query
) 
1439         result_code 
= self
._search
_regex
( 
1440             r
'(?s)<ol class="item-section"(.*?)</ol>', webpage
, 'result HTML') 
1442         part_codes 
= re
.findall( 
1443             r
'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code
) 
1445         for part_code 
in part_codes
: 
1446             part_title 
= self
._html
_search
_regex
( 
1447                 [r
'(?s)title="([^"]+)"', r
'>([^<]+)</a>'], part_code
, 'item title', fatal
=False) 
1448             part_url_snippet 
= self
._html
_search
_regex
( 
1449                 r
'(?s)href="([^"]+)"', part_code
, 'item URL') 
1450             part_url 
= compat_urlparse
.urljoin( 
1451                 'https://www.youtube.com/', part_url_snippet
) 
1455                 'title': part_title
, 
1459             '_type': 'playlist', 
1465 class YoutubeShowIE(InfoExtractor
): 
1466     IE_DESC 
= 'YouTube.com (multi-season) shows' 
1467     _VALID_URL 
= r
'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' 
1468     IE_NAME 
= 'youtube:show' 
1470         'url': 'http://www.youtube.com/show/airdisasters', 
1471         'playlist_mincount': 3, 
1473             'id': 'airdisasters', 
1474             'title': 'Air Disasters', 
1478     def _real_extract(self
, url
): 
1479         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1480         playlist_id 
= mobj
.group('id') 
1481         webpage 
= self
._download
_webpage
( 
1482             url
, playlist_id
, 'Downloading show webpage') 
1483         # There's one playlist for each season of the show 
1484         m_seasons 
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
)) 
1485         self
.to_screen('%s: Found %s seasons' % (playlist_id
, len(m_seasons
))) 
1488                 'https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') 
1489             for season 
in m_seasons
 
1491         title 
= self
._og
_search
_title
(webpage
, fatal
=False) 
1494             '_type': 'playlist', 
1501 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
): 
1503     Base class for extractors that fetch info from 
1504     http://www.youtube.com/feed_ajax 
1505     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. 
1507     _LOGIN_REQUIRED 
= True 
1508     # use action_load_personal_feed instead of action_load_system_feed 
1509     _PERSONAL_FEED 
= False 
1512     def _FEED_TEMPLATE(self
): 
1513         action 
= 'action_load_system_feed' 
1514         if self
._PERSONAL
_FEED
: 
1515             action 
= 'action_load_personal_feed' 
1516         return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action
, self
._FEED
_NAME
) 
1520         return 'youtube:%s' % self
._FEED
_NAME
 
1522     def _real_initialize(self
): 
1525     def _real_extract(self
, url
): 
1528         for i 
in itertools
.count(1): 
1529             info 
= self
._download
_json
(self
._FEED
_TEMPLATE 
% paging
, 
1530                                           '%s feed' % self
._FEED
_NAME
, 
1531                                           'Downloading page %s' % i
) 
1532             feed_html 
= info
.get('feed_html') or info
.get('content_html') 
1533             load_more_widget_html 
= info
.get('load_more_widget_html') or feed_html
 
1534             m_ids 
= re
.finditer(r
'"/watch\?v=(.*?)["&]', feed_html
) 
1535             ids 
= orderedSet(m
.group(1) for m 
in m_ids
) 
1536             feed_entries
.extend( 
1537                 self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1538                 for video_id 
in ids
) 
1540                 r
'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)', 
1541                 load_more_widget_html
) 
1544             paging 
= mobj
.group('paging') 
1545         return self
.playlist_result(feed_entries
, playlist_title
=self
._PLAYLIST
_TITLE
) 
1547 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
): 
1548     IE_DESC 
= 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)' 
1549     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' 
1550     _FEED_NAME 
= 'recommended' 
1551     _PLAYLIST_TITLE 
= 'Youtube Recommended videos' 
1553 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor
): 
1554     IE_DESC 
= 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)' 
1555     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' 
1556     _FEED_NAME 
= 'watch_later' 
1557     _PLAYLIST_TITLE 
= 'Youtube Watch Later' 
1558     _PERSONAL_FEED 
= True 
1560 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor
): 
1561     IE_DESC 
= 'Youtube watch history, "ythistory" keyword (requires authentication)' 
1562     _VALID_URL 
= 'https?://www\.youtube\.com/feed/history|:ythistory' 
1563     _FEED_NAME 
= 'history' 
1564     _PERSONAL_FEED 
= True 
1565     _PLAYLIST_TITLE 
= 'Youtube Watch History' 
1567 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
): 
1568     IE_NAME 
= 'youtube:favorites' 
1569     IE_DESC 
= 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' 
1570     _VALID_URL 
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' 
1571     _LOGIN_REQUIRED 
= True 
1573     def _real_extract(self
, url
): 
1574         webpage 
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') 
1575         playlist_id 
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, 'favourites playlist id') 
1576         return self
.url_result(playlist_id
, 'YoutubePlaylist') 
1579 class YoutubeSubscriptionsIE(YoutubePlaylistIE
): 
1580     IE_NAME 
= 'youtube:subscriptions' 
1581     IE_DESC 
= 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' 
1582     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' 
1585     def _real_extract(self
, url
): 
1586         title 
= 'Youtube Subscriptions' 
1587         page 
= self
._download
_webpage
('https://www.youtube.com/feed/subscriptions', title
) 
1589         # The extraction process is the same as for playlists, but the regex 
1590         # for the video ids doesn't contain an index 
1592         more_widget_html 
= content_html 
= page
 
1594         for page_num 
in itertools
.count(1): 
1595             matches 
= re
.findall(r
'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html
) 
1596             new_ids 
= orderedSet(matches
) 
1599             mobj 
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
) 
1603             more 
= self
._download
_json
( 
1604                 'https://youtube.com/%s' % mobj
.group('more'), title
, 
1605                 'Downloading page #%s' % page_num
, 
1606                 transform_source
=uppercase_escape
) 
1607             content_html 
= more
['content_html'] 
1608             more_widget_html 
= more
['load_more_widget_html'] 
1611             '_type': 'playlist', 
1613             'entries': self
._ids
_to
_results
(ids
), 
1617 class YoutubeTruncatedURLIE(InfoExtractor
): 
1618     IE_NAME 
= 'youtube:truncated_url' 
1619     IE_DESC 
= False  # Do not list 
1620     _VALID_URL 
= r
'''(?x) 
1621         (?:https?://)?[^/]+/watch\?(?: 
1623             annotation_id=annotation_[^&]+ 
1625         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ 
1629         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', 
1630         'only_matching': True, 
1632         'url': 'http://www.youtube.com/watch?', 
1633         'only_matching': True, 
1636     def _real_extract(self
, url
): 
1637         raise ExtractorError( 
1638             'Did you forget to quote the URL? Remember that & is a meta ' 
1639             'character in most shells, so you want to put the URL in quotes, ' 
1641             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' 
1642             ' or simply  youtube-dl BaW_jenozKc  .',