3 from __future__ 
import unicode_literals
 
  13 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  14 from ..jsinterp 
import JSInterpreter
 
  15 from ..swfinterp 
import SWFInterpreter
 
  16 from ..compat 
import ( 
  20     compat_urllib_request
, 
  28     get_element_by_attribute
, 
  38 class YoutubeBaseInfoExtractor(InfoExtractor
): 
  39     """Provide base functions for Youtube extractors""" 
  40     _LOGIN_URL 
= 'https://accounts.google.com/ServiceLogin' 
  41     _TWOFACTOR_URL 
= 'https://accounts.google.com/SecondFactor' 
  42     _NETRC_MACHINE 
= 'youtube' 
  43     # If True it will raise an error if no login info is provided 
  44     _LOGIN_REQUIRED 
= False 
  46     def _set_language(self
): 
  48             '.youtube.com', 'PREF', 'f1=50000000&hl=en', 
  49             # YouTube sets the expire time to about two months 
  50             expire_time
=time
.time() + 2 * 30 * 24 * 3600) 
  52     def _ids_to_results(self
, ids
): 
  54             self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
) 
  59         Attempt to log in to YouTube. 
  60         True is returned if successful or skipped. 
  61         False is returned if login failed. 
  63         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. 
  65         (username
, password
) = self
._get
_login
_info
() 
  66         # No authentication to be performed 
  68             if self
._LOGIN
_REQUIRED
: 
  69                 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  72         login_page 
= self
._download
_webpage
( 
  73             self
._LOGIN
_URL
, None, 
  74             note
='Downloading login page', 
  75             errnote
='unable to fetch login page', fatal
=False) 
  76         if login_page 
is False: 
  79         galx 
= self
._search
_regex
(r
'(?s)<input.+?name="GALX".+?value="(.+?)"', 
  80                                   login_page
, 'Login GALX parameter') 
  84             'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', 
  89             'PersistentCookie': 'yes', 
  91             'bgresponse': 'js_disabled', 
  92             'checkConnection': '', 
  93             'checkedDomains': 'youtube', 
 100             'service': 'youtube', 
 105         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode 
 107         login_form 
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
, v 
in login_form_strs
.items()) 
 108         login_data 
= compat_urllib_parse
.urlencode(login_form
).encode('ascii') 
 110         req 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
) 
 111         login_results 
= self
._download
_webpage
( 
 113             note
='Logging in', errnote
='unable to log in', fatal
=False) 
 114         if login_results 
is False: 
 117         if re
.search(r
'id="errormsg_0_Passwd"', login_results
) is not None: 
 118             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected
=True) 
 121         # TODO add SMS and phone call support - these require making a request and then prompting the user 
 123         if re
.search(r
'(?i)<form[^>]* id="gaia_secondfactorform"', login_results
) is not None: 
 124             tfa_code 
= self
._get
_tfa
_info
() 
 127                 self
._downloader
.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') 
 128                 self
._downloader
.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)') 
 131             # Unlike the first login form, secTok and timeStmp are both required for the TFA form 
 133             match 
= re
.search(r
'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) 
 135                 self._downloader.report_warning('Failed to get secTok 
- did the page structure change?
') 
 136             secTok = match.group(1) 
 137             match = re.search(r'id="timeStmp"\n\s
+value
=\'(.+)\'/>', login_results, re.M | re.U) 
 139                 self._downloader.report_warning('Failed to get timeStmp 
- did the page structure change?
') 
 140             timeStmp = match.group(1) 
 143                 'continue': 'https
://www
.youtube
.com
/signin?action_handle_signin
=true
&feature
=sign_in_button
&hl
=en_US
&nomobiletemp
=1', 
 145                 'smsUserPin
': tfa_code, 
 146                 'smsVerifyPin
': 'Verify
', 
 148                 'PersistentCookie
': 'yes
', 
 149                 'checkConnection
': '', 
 150                 'checkedDomains
': 'youtube
', 
 153                 'timeStmp
': timeStmp, 
 154                 'service
': 'youtube
', 
 157             tfa_form = dict((k.encode('utf
-8'), v.encode('utf
-8')) for k, v in tfa_form_strs.items()) 
 158             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii
') 
 160             tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) 
 161             tfa_results = self._download_webpage( 
 163                 note='Submitting TFA code
', errnote='unable to submit tfa
', fatal=False) 
 165             if tfa_results is False: 
 168             if re.search(r'(?i
)<form
[^
>]* id="gaia_secondfactorform"', tfa_results) is not None: 
 169                 self._downloader.report_warning('Two
-factor code expired
. Please 
try again
, or use a one
-use backup code instead
.') 
 171             if re.search(r'(?i
)<form
[^
>]* id="gaia_loginform"', tfa_results) is not None: 
 172                 self._downloader.report_warning('unable to log 
in - did the page structure change?
') 
 174             if re.search(r'smsauth
-interstitial
-reviewsettings
', tfa_results) is not None: 
 175                 self._downloader.report_warning('Your Google account has a security notice
. Please log 
in on your web browser
, resolve the notice
, and try again
.') 
 178         if re.search(r'(?i
)<form
[^
>]* id="gaia_loginform"', login_results) is not None: 
 179             self._downloader.report_warning('unable to log 
in: bad username 
or password
') 
 183     def _real_initialize(self): 
 184         if self._downloader is None: 
 187         if not self._login(): 
 191 class YoutubeIE(YoutubeBaseInfoExtractor): 
 192     IE_DESC = 'YouTube
.com
' 
 193     _VALID_URL = r"""(?x)^ 
 195                          (?:https?://|//)                                    # http(s):// or protocol-independent URL 
 196                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| 
 197                             (?:www\.)?deturl\.com/www\.youtube\.com/| 
 198                             (?:www\.)?pwnyoutube\.com/| 
 199                             (?:www\.)?yourepeat\.com/| 
 200                             tube\.majestyc\.net/| 
 201                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains 
 202                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls 
 203                          (?:                                                  # the various things that can precede the ID: 
 204                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/ 
 205                              |(?:                                             # or the v= param in all its forms 
 206                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx) 
 207                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! 
 208                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) 
 212                          |youtu\.be/                                          # just youtu.be/xxxx 
 213                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= 
 215                      )?                                                       # all until now is optional -> you can pass the naked ID 
 216                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID 
 217                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE 
 218                      (?(1).+)?                                                # if we found the ID, everything can follow 
 220     _NEXT_URL_RE = r'[\?&]next_url
=([^
&]+)' 
 222         '5': {'ext
': 'flv
', 'width
': 400, 'height
': 240}, 
 223         '6': {'ext
': 'flv
', 'width
': 450, 'height
': 270}, 
 224         '13': {'ext
': '3gp
'}, 
 225         '17': {'ext
': '3gp
', 'width
': 176, 'height
': 144}, 
 226         '18': {'ext
': 'mp4
', 'width
': 640, 'height
': 360}, 
 227         '22': {'ext
': 'mp4
', 'width
': 1280, 'height
': 720}, 
 228         '34': {'ext
': 'flv
', 'width
': 640, 'height
': 360}, 
 229         '35': {'ext
': 'flv
', 'width
': 854, 'height
': 480}, 
 230         '36': {'ext
': '3gp
', 'width
': 320, 'height
': 240}, 
 231         '37': {'ext
': 'mp4
', 'width
': 1920, 'height
': 1080}, 
 232         '38': {'ext
': 'mp4
', 'width
': 4096, 'height
': 3072}, 
 233         '43': {'ext
': 'webm
', 'width
': 640, 'height
': 360}, 
 234         '44': {'ext
': 'webm
', 'width
': 854, 'height
': 480}, 
 235         '45': {'ext
': 'webm
', 'width
': 1280, 'height
': 720}, 
 236         '46': {'ext
': 'webm
', 'width
': 1920, 'height
': 1080}, 
 240         '82': {'ext
': 'mp4
', 'height
': 360, 'format_note
': '3D
', 'preference
': -20}, 
 241         '83': {'ext
': 'mp4
', 'height
': 480, 'format_note
': '3D
', 'preference
': -20}, 
 242         '84': {'ext
': 'mp4
', 'height
': 720, 'format_note
': '3D
', 'preference
': -20}, 
 243         '85': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': '3D
', 'preference
': -20}, 
 244         '100': {'ext
': 'webm
', 'height
': 360, 'format_note
': '3D
', 'preference
': -20}, 
 245         '101': {'ext
': 'webm
', 'height
': 480, 'format_note
': '3D
', 'preference
': -20}, 
 246         '102': {'ext
': 'webm
', 'height
': 720, 'format_note
': '3D
', 'preference
': -20}, 
 248         # Apple HTTP Live Streaming 
 249         '92': {'ext
': 'mp4
', 'height
': 240, 'format_note
': 'HLS
', 'preference
': -10}, 
 250         '93': {'ext
': 'mp4
', 'height
': 360, 'format_note
': 'HLS
', 'preference
': -10}, 
 251         '94': {'ext
': 'mp4
', 'height
': 480, 'format_note
': 'HLS
', 'preference
': -10}, 
 252         '95': {'ext
': 'mp4
', 'height
': 720, 'format_note
': 'HLS
', 'preference
': -10}, 
 253         '96': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': 'HLS
', 'preference
': -10}, 
 254         '132': {'ext
': 'mp4
', 'height
': 240, 'format_note
': 'HLS
', 'preference
': -10}, 
 255         '151': {'ext
': 'mp4
', 'height
': 72, 'format_note
': 'HLS
', 'preference
': -10}, 
 258         '133': {'ext
': 'mp4
', 'height
': 240, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 259         '134': {'ext
': 'mp4
', 'height
': 360, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 260         '135': {'ext
': 'mp4
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 261         '136': {'ext
': 'mp4
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 262         '137': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 263         '138': {'ext
': 'mp4
', 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) 
 264         '160': {'ext
': 'mp4
', 'height
': 144, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 265         '264': {'ext
': 'mp4
', 'height
': 1440, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 266         '298': {'ext
': 'mp4
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'h264
'}, 
 267         '299': {'ext
': 'mp4
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'h264
'}, 
 268         '266': {'ext
': 'mp4
', 'height
': 2160, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'vcodec
': 'h264
'}, 
 271         '139': {'ext
': 'm4a
', 'format_note
': 'DASH audio
', 'acodec
': 'aac
', 'vcodec
': 'none
', 'abr
': 48, 'preference
': -50, 'container
': 'm4a_dash
'}, 
 272         '140': {'ext
': 'm4a
', 'format_note
': 'DASH audio
', 'acodec
': 'aac
', 'vcodec
': 'none
', 'abr
': 128, 'preference
': -50, 'container
': 'm4a_dash
'}, 
 273         '141': {'ext
': 'm4a
', 'format_note
': 'DASH audio
', 'acodec
': 'aac
', 'vcodec
': 'none
', 'abr
': 256, 'preference
': -50, 'container
': 'm4a_dash
'}, 
 276         '167': {'ext
': 'webm
', 'height
': 360, 'width
': 640, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 277         '168': {'ext
': 'webm
', 'height
': 480, 'width
': 854, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 278         '169': {'ext
': 'webm
', 'height
': 720, 'width
': 1280, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 279         '170': {'ext
': 'webm
', 'height
': 1080, 'width
': 1920, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 280         '218': {'ext
': 'webm
', 'height
': 480, 'width
': 854, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 281         '219': {'ext
': 'webm
', 'height
': 480, 'width
': 854, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'container
': 'webm
', 'vcodec
': 'VP8
', 'preference
': -40}, 
 282         '278': {'ext
': 'webm
', 'height
': 144, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'container
': 'webm
', 'vcodec
': 'VP9
'}, 
 283         '242': {'ext
': 'webm
', 'height
': 240, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 284         '243': {'ext
': 'webm
', 'height
': 360, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 285         '244': {'ext
': 'webm
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 286         '245': {'ext
': 'webm
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 287         '246': {'ext
': 'webm
', 'height
': 480, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 288         '247': {'ext
': 'webm
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 289         '248': {'ext
': 'webm
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 290         '271': {'ext
': 'webm
', 'height
': 1440, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 291         '272': {'ext
': 'webm
', 'height
': 2160, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40}, 
 292         '302': {'ext
': 'webm
', 'height
': 720, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'VP9
'}, 
 293         '303': {'ext
': 'webm
', 'height
': 1080, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'VP9
'}, 
 294         '308': {'ext
': 'webm
', 'height
': 1440, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'VP9
'}, 
 295         '313': {'ext
': 'webm
', 'height
': 2160, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'vcodec
': 'VP9
'}, 
 296         '315': {'ext
': 'webm
', 'height
': 2160, 'format_note
': 'DASH video
', 'acodec
': 'none
', 'preference
': -40, 'fps
': 60, 'vcodec
': 'VP9
'}, 
 299         '171': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'abr
': 128, 'preference
': -50}, 
 300         '172': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'abr
': 256, 'preference
': -50}, 
 302         # Dash webm audio with opus inside 
 303         '249': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'acodec
': 'opus
', 'abr
': 50, 'preference
': -50}, 
 304         '250': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'acodec
': 'opus
', 'abr
': 70, 'preference
': -50}, 
 305         '251': {'ext
': 'webm
', 'vcodec
': 'none
', 'format_note
': 'DASH audio
', 'acodec
': 'opus
', 'abr
': 160, 'preference
': -50}, 
 308         '_rtmp
': {'protocol
': 'rtmp
'}, 
 314             'url
': 'http
://www
.youtube
.com
/watch?v
=BaW_jenozKc
', 
 318                 'title
': 'youtube
-dl test video 
"\'/\\ä↭𝕐', 
 319                 'uploader': 'Philipp Hagemeister', 
 320                 'uploader_id': 'phihag', 
 321                 'upload_date': '20121002', 
 322                 'description': 'test chars:  "\'/\\ä↭𝕐
\ntest URL
: https
://github
.com
/rg3
/youtube
-dl
/issues
/1892\n\nThis 
is a test video 
for youtube
-dl
.\n\nFor more information
, contact phihag
@phihag.de .', 
 323                 'categories
': ['Science 
& Technology
'], 
 325                 'dislike_count
': int, 
 329             'url
': 'http
://www
.youtube
.com
/watch?v
=UxxajLWwzqY
', 
 330             'note
': 'Test generic use_cipher_signature 
video (#897)', 
 334                 'upload_date': '20120506', 
 335                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', 
 336                 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', 
 337                 'uploader': 'Icona Pop', 
 338                 'uploader_id': 'IconaPop', 
 342             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ', 
 343             'note': 'Test VEVO video with age protection (#956)', 
 347                 'upload_date': '20130703', 
 348                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 
 349                 'description': 'md5:64249768eec3bc4276236606ea996373', 
 350                 'uploader': 'justintimberlakeVEVO', 
 351                 'uploader_id': 'justintimberlakeVEVO', 
 355             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', 
 356             'note': 'Embed-only video (#1746)', 
 360                 'upload_date': '20120608', 
 361                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', 
 362                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 
 363                 'uploader': 'SET India', 
 364                 'uploader_id': 'setindia' 
 368             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', 
 369             'note': '256k DASH audio (format 141) via DASH manifest', 
 373                 'upload_date': '20121002', 
 374                 'uploader_id': '8KVIDEO', 
 376                 'uploader': '8KVIDEO', 
 377                 'title': 'UHDTV TEST 8K VIDEO.mp4' 
 380                 'youtube_include_dash_manifest': True, 
 384         # DASH manifest with encrypted signature 
 386             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA', 
 390                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', 
 391                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d', 
 392                 'uploader': 'AfrojackVEVO', 
 393                 'uploader_id': 'AfrojackVEVO', 
 394                 'upload_date': '20131011', 
 397                 'youtube_include_dash_manifest': True, 
 401         # JS player signature function name containing $ 
 403             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM', 
 407                 'title': 'Taylor Swift - Shake It Off', 
 408                 'description': 'md5:2acfda1b285bdd478ccec22f9918199d', 
 409                 'uploader': 'TaylorSwiftVEVO', 
 410                 'uploader_id': 'TaylorSwiftVEVO', 
 411                 'upload_date': '20140818', 
 414                 'youtube_include_dash_manifest': True, 
 420             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8', 
 424                 'upload_date': '20100909', 
 425                 'uploader': 'The Amazing Atheist', 
 426                 'uploader_id': 'TheAmazingAtheist', 
 427                 'title': 'Burning Everyone\'s Koran', 
 428                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', 
 431         # Normal age-gate video (No vevo, embed allowed) 
 433             'url': 'http://youtube.com/watch?v=HtVdAasjOgU', 
 437                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', 
 438                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 
 439                 'uploader': 'The Witcher', 
 440                 'uploader_id': 'WitcherGame', 
 441                 'upload_date': '20140605', 
 444         # Age-gate video with encrypted signature 
 446             'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU', 
 450                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)', 
 451                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', 
 452                 'uploader': 'LloydVEVO', 
 453                 'uploader_id': 'LloydVEVO', 
 454                 'upload_date': '20110629', 
 457         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) 
 459             'url': '__2ABJjxzNo', 
 463                 'upload_date': '20100430', 
 464                 'uploader_id': 'deadmau5', 
 465                 'description': 'md5:12c56784b8032162bb936a5f76d55360', 
 466                 'uploader': 'deadmau5', 
 467                 'title': 'Deadmau5 - Some Chords (HD)', 
 469             'expected_warnings': [ 
 470                 'DASH manifest missing', 
 473         # Olympics (https://github.com/rg3/youtube-dl/issues/4431) 
 475             'url': 'lqQg6PlCWgI', 
 479                 'upload_date': '20120731', 
 480                 'uploader_id': 'olympic', 
 481                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 
 482                 'uploader': 'Olympics', 
 483                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games', 
 486                 'skip_download': 'requires avconv', 
 491             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0', 
 495                 'stretched_ratio': 16 / 9., 
 496                 'upload_date': '20110310', 
 497                 'uploader_id': 'AllenMeow', 
 498                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 
 500                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', 
 503         # url_encoded_fmt_stream_map is empty string 
 505             'url': 'qEJwOuvDf7I', 
 509                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', 
 511                 'upload_date': '20150404', 
 512                 'uploader_id': 'spbelect', 
 513                 'uploader': 'Наблюдатели Петербурга', 
 516                 'skip_download': 'requires avconv', 
 521     def __init__(self
, *args
, **kwargs
): 
 522         super(YoutubeIE
, self
).__init
__(*args
, **kwargs
) 
 523         self
._player
_cache 
= {} 
 525     def report_video_info_webpage_download(self
, video_id
): 
 526         """Report attempt to download video info webpage.""" 
 527         self
.to_screen('%s: Downloading video info webpage' % video_id
) 
 529     def report_information_extraction(self
, video_id
): 
 530         """Report attempt to extract video information.""" 
 531         self
.to_screen('%s: Extracting video information' % video_id
) 
 533     def report_unavailable_format(self
, video_id
, format
): 
 534         """Report extracted video URL.""" 
 535         self
.to_screen('%s: Format %s not available' % (video_id
, format
)) 
 537     def report_rtmp_download(self
): 
 538         """Indicate the download will use the RTMP protocol.""" 
 539         self
.to_screen('RTMP download detected') 
 541     def _signature_cache_id(self
, example_sig
): 
 542         """ Return a string representation of a signature """ 
 543         return '.'.join(compat_str(len(part
)) for part 
in example_sig
.split('.')) 
 545     def _extract_signature_function(self
, video_id
, player_url
, example_sig
): 
 547             r
'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$', 
 550             raise ExtractorError('Cannot identify player %r' % player_url
) 
 551         player_type 
= id_m
.group('ext') 
 552         player_id 
= id_m
.group('id') 
 554         # Read from filesystem cache 
 555         func_id 
= '%s_%s_%s' % ( 
 556             player_type
, player_id
, self
._signature
_cache
_id
(example_sig
)) 
 557         assert os
.path
.basename(func_id
) == func_id
 
 559         cache_spec 
= self
._downloader
.cache
.load('youtube-sigfuncs', func_id
) 
 560         if cache_spec 
is not None: 
 561             return lambda s
: ''.join(s
[i
] for i 
in cache_spec
) 
 564             'Downloading player %s' % player_url
 
 565             if self
._downloader
.params
.get('verbose') else 
 566             'Downloading %s player %s' % (player_type
, player_id
) 
 568         if player_type 
== 'js': 
 569             code 
= self
._download
_webpage
( 
 570                 player_url
, video_id
, 
 572                 errnote
='Download of %s failed' % player_url
) 
 573             res 
= self
._parse
_sig
_js
(code
) 
 574         elif player_type 
== 'swf': 
 575             urlh 
= self
._request
_webpage
( 
 576                 player_url
, video_id
, 
 578                 errnote
='Download of %s failed' % player_url
) 
 580             res 
= self
._parse
_sig
_swf
(code
) 
 582             assert False, 'Invalid player type %r' % player_type
 
 584         test_string 
= ''.join(map(compat_chr
, range(len(example_sig
)))) 
 585         cache_res 
= res(test_string
) 
 586         cache_spec 
= [ord(c
) for c 
in cache_res
] 
 588         self
._downloader
.cache
.store('youtube-sigfuncs', func_id
, cache_spec
) 
 591     def _print_sig_code(self
, func
, example_sig
): 
 592         def gen_sig_code(idxs
): 
 593             def _genslice(start
, end
, step
): 
 594                 starts 
= '' if start 
== 0 else str(start
) 
 595                 ends 
= (':%d' % (end 
+ step
)) if end 
+ step 
>= 0 else ':' 
 596                 steps 
= '' if step 
== 1 else (':%d' % step
) 
 597                 return 's[%s%s%s]' % (starts
, ends
, steps
) 
 600             # Quelch pyflakes warnings - start will be set when step is set 
 601             start 
= '(Never used)' 
 602             for i
, prev 
in zip(idxs
[1:], idxs
[:-1]): 
 606                     yield _genslice(start
, prev
, step
) 
 609                 if i 
- prev 
in [-1, 1]: 
 618                 yield _genslice(start
, i
, step
) 
 620         test_string 
= ''.join(map(compat_chr
, range(len(example_sig
)))) 
 621         cache_res 
= func(test_string
) 
 622         cache_spec 
= [ord(c
) for c 
in cache_res
] 
 623         expr_code 
= ' + '.join(gen_sig_code(cache_spec
)) 
 624         signature_id_tuple 
= '(%s)' % ( 
 625             ', '.join(compat_str(len(p
)) for p 
in example_sig
.split('.'))) 
 626         code 
= ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' 
 627                 '    return %s\n') % (signature_id_tuple
, expr_code
) 
 628         self
.to_screen('Extracted signature function:\n' + code
) 
 630     def _parse_sig_js(self
, jscode
): 
 631         funcname 
= self
._search
_regex
( 
 632             r
'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode
, 
 633             'Initial JS player signature function name') 
 635         jsi 
= JSInterpreter(jscode
) 
 636         initial_function 
= jsi
.extract_function(funcname
) 
 637         return lambda s
: initial_function([s
]) 
 639     def _parse_sig_swf(self
, file_contents
): 
 640         swfi 
= SWFInterpreter(file_contents
) 
 641         TARGET_CLASSNAME 
= 'SignatureDecipher' 
 642         searched_class 
= swfi
.extract_class(TARGET_CLASSNAME
) 
 643         initial_function 
= swfi
.extract_function(searched_class
, 'decipher') 
 644         return lambda s
: initial_function([s
]) 
 646     def _decrypt_signature(self
, s
, video_id
, player_url
, age_gate
=False): 
 647         """Turn the encrypted s field into a working signature""" 
 649         if player_url 
is None: 
 650             raise ExtractorError('Cannot decrypt signature without player_url') 
 652         if player_url
.startswith('//'): 
 653             player_url 
= 'https:' + player_url
 
 655             player_id 
= (player_url
, self
._signature
_cache
_id
(s
)) 
 656             if player_id 
not in self
._player
_cache
: 
 657                 func 
= self
._extract
_signature
_function
( 
 658                     video_id
, player_url
, s
 
 660                 self
._player
_cache
[player_id
] = func
 
 661             func 
= self
._player
_cache
[player_id
] 
 662             if self
._downloader
.params
.get('youtube_print_sig_code'): 
 663                 self
._print
_sig
_code
(func
, s
) 
 665         except Exception as e
: 
 666             tb 
= traceback
.format_exc() 
 667             raise ExtractorError( 
 668                 'Signature extraction failed: ' + tb
, cause
=e
) 
 670     def _get_subtitles(self
, video_id
, webpage
): 
 672             subs_doc 
= self
._download
_xml
( 
 673                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
, 
 674                 video_id
, note
=False) 
 675         except ExtractorError 
as err
: 
 676             self
._downloader
.report_warning('unable to download video subtitles: %s' % compat_str(err
)) 
 680         for track 
in subs_doc
.findall('track'): 
 681             lang 
= track
.attrib
['lang_code'] 
 682             if lang 
in sub_lang_list
: 
 685             for ext 
in ['sbv', 'vtt', 'srt']: 
 686                 params 
= compat_urllib_parse
.urlencode({ 
 690                     'name': track
.attrib
['name'].encode('utf-8'), 
 693                     'url': 'https://www.youtube.com/api/timedtext?' + params
, 
 696             sub_lang_list
[lang
] = sub_formats
 
 697         if not sub_lang_list
: 
 698             self
._downloader
.report_warning('video doesn\'t have subtitles') 
 702     def _get_automatic_captions(self
, video_id
, webpage
): 
 703         """We need the webpage for getting the captions url, pass it as an 
 704            argument to speed up the process.""" 
 705         self
.to_screen('%s: Looking for automatic captions' % video_id
) 
 706         mobj 
= re
.search(r
';ytplayer.config = ({.*?});', webpage
) 
 707         err_msg 
= 'Couldn\'t find automatic captions for %s' % video_id
 
 709             self
._downloader
.report_warning(err_msg
) 
 711         player_config 
= json
.loads(mobj
.group(1)) 
 713             args 
= player_config
['args'] 
 714             caption_url 
= args
['ttsurl'] 
 715             timestamp 
= args
['timestamp'] 
 716             # We get the available subtitles 
 717             list_params 
= compat_urllib_parse
.urlencode({ 
 722             list_url 
= caption_url 
+ '&' + list_params
 
 723             caption_list 
= self
._download
_xml
(list_url
, video_id
) 
 724             original_lang_node 
= caption_list
.find('track') 
 725             if original_lang_node 
is None: 
 726                 self
._downloader
.report_warning('Video doesn\'t have automatic captions') 
 728             original_lang 
= original_lang_node
.attrib
['lang_code'] 
 729             caption_kind 
= original_lang_node
.attrib
.get('kind', '') 
 732             for lang_node 
in caption_list
.findall('target'): 
 733                 sub_lang 
= lang_node
.attrib
['lang_code'] 
 735                 for ext 
in ['sbv', 'vtt', 'srt']: 
 736                     params 
= compat_urllib_parse
.urlencode({ 
 737                         'lang': original_lang
, 
 741                         'kind': caption_kind
, 
 744                         'url': caption_url 
+ '&' + params
, 
 747                 sub_lang_list
[sub_lang
] = sub_formats
 
 749         # An extractor error can be raise by the download process if there are 
 750         # no automatic captions but there are subtitles 
 751         except (KeyError, ExtractorError
): 
 752             self
._downloader
.report_warning(err_msg
) 
 756     def extract_id(cls
, url
): 
 757         mobj 
= re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) 
 759             raise ExtractorError('Invalid URL: %s' % url
) 
 760         video_id 
= mobj
.group(2) 
 763     def _extract_from_m3u8(self
, manifest_url
, video_id
): 
 766         def _get_urls(_manifest
): 
 767             lines 
= _manifest
.split('\n') 
 768             urls 
= filter(lambda l
: l 
and not l
.startswith('#'), 
 771         manifest 
= self
._download
_webpage
(manifest_url
, video_id
, 'Downloading formats manifest') 
 772         formats_urls 
= _get_urls(manifest
) 
 773         for format_url 
in formats_urls
: 
 774             itag 
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag') 
 775             url_map
[itag
] = format_url
 
 778     def _extract_annotations(self
, video_id
): 
 779         url 
= 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
 
 780         return self
._download
_webpage
(url
, video_id
, note
='Searching for annotations.', errnote
='Unable to download video annotations.') 
 782     def _parse_dash_manifest( 
 783             self
, video_id
, dash_manifest_url
, player_url
, age_gate
): 
 784         def decrypt_sig(mobj
): 
 786             dec_s 
= self
._decrypt
_signature
(s
, video_id
, player_url
, age_gate
) 
 787             return '/signature/%s' % dec_s
 
 788         dash_manifest_url 
= re
.sub(r
'/s/([\w\.]+)', decrypt_sig
, dash_manifest_url
) 
 789         dash_doc 
= self
._download
_xml
( 
 790             dash_manifest_url
, video_id
, 
 791             note
='Downloading DASH manifest', 
 792             errnote
='Could not download DASH manifest') 
 795         for a 
in dash_doc
.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): 
 796             mime_type 
= a
.attrib
.get('mimeType') 
 797             for r 
in a
.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): 
 798                 url_el 
= r
.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') 
 801                 if mime_type 
== 'text/vtt': 
 802                     # TODO implement WebVTT downloading 
 804                 elif mime_type
.startswith('audio/') or mime_type
.startswith('video/'): 
 805                     format_id 
= r
.attrib
['id'] 
 806                     video_url 
= url_el
.text
 
 807                     filesize 
= int_or_none(url_el
.attrib
.get('{http://youtube.com/yt/2012/10/10}contentLength')) 
 809                         'format_id': format_id
, 
 811                         'width': int_or_none(r
.attrib
.get('width')), 
 812                         'height': int_or_none(r
.attrib
.get('height')), 
 813                         'tbr': int_or_none(r
.attrib
.get('bandwidth'), 1000), 
 814                         'asr': int_or_none(r
.attrib
.get('audioSamplingRate')), 
 815                         'filesize': filesize
, 
 816                         'fps': int_or_none(r
.attrib
.get('frameRate')), 
 819                         existing_format 
= next( 
 821                             if fo
['format_id'] == format_id
) 
 822                     except StopIteration: 
 823                         full_info 
= self
._formats
.get(format_id
, {}).copy() 
 825                         formats
.append(full_info
) 
 827                         existing_format
.update(f
) 
 829                     self
.report_warning('Unknown MIME type %s in DASH manifest' % mime_type
) 
 832     def _real_extract(self
, url
): 
 834             'http' if self
._downloader
.params
.get('prefer_insecure', False) 
 837         # Extract original video URL from URL with redirection, like age verification, using next_url parameter 
 838         mobj 
= re
.search(self
._NEXT
_URL
_RE
, url
) 
 840             url 
= proto 
+ '://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/') 
 841         video_id 
= self
.extract_id(url
) 
 844         url 
= proto 
+ '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
 
 845         video_webpage 
= self
._download
_webpage
(url
, video_id
) 
 847         # Attempt to extract SWF player URL 
 848         mobj 
= re
.search(r
'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
 850             player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
 856         if re
.search(r
'player-age-gate-content">', video_webpage
) is not None: 
 858             # We simulate the access to the video from www.youtube.com/v/{video_id} 
 859             # this can be viewed without login into Youtube 
 860             url 
= proto 
+ '://www.youtube.com/embed/%s' % video_id
 
 861             embed_webpage 
= self
._download
_webpage
(url
, video_id
, 'Downloading embed webpage') 
 862             data 
= compat_urllib_parse
.urlencode({ 
 863                 'video_id': video_id
, 
 864                 'eurl': 'https://youtube.googleapis.com/v/' + video_id
, 
 865                 'sts': self
._search
_regex
( 
 866                     r
'"sts"\s*:\s*(\d+)', embed_webpage
, 'sts', default
=''), 
 868             video_info_url 
= proto 
+ '://www.youtube.com/get_video_info?' + data
 
 869             video_info_webpage 
= self
._download
_webpage
( 
 870                 video_info_url
, video_id
, 
 871                 note
='Refetching age-gated info webpage', 
 872                 errnote
='unable to download video info webpage') 
 873             video_info 
= compat_parse_qs(video_info_webpage
) 
 877                 # Try looking directly into the video webpage 
 878                 mobj 
= re
.search(r
';ytplayer\.config\s*=\s*({.*?});', video_webpage
) 
 880                     raise ValueError('Could not find ytplayer.config')  # caught below 
 881                 json_code 
= uppercase_escape(mobj
.group(1)) 
 882                 ytplayer_config 
= json
.loads(json_code
) 
 883                 args 
= ytplayer_config
['args'] 
 884                 # Convert to the same format returned by compat_parse_qs 
 885                 video_info 
= dict((k
, [v
]) for k
, v 
in args
.items()) 
 886                 if not args
.get('url_encoded_fmt_stream_map'): 
 887                     raise ValueError('No stream_map present')  # caught below 
 889                 # We fallback to the get_video_info pages (used by the embed page) 
 890                 self
.report_video_info_webpage_download(video_id
) 
 891                 for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
 893                         '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
 894                         % (proto
, video_id
, el_type
)) 
 895                     video_info_webpage 
= self
._download
_webpage
( 
 897                         video_id
, note
=False, 
 898                         errnote
='unable to download video info webpage') 
 899                     video_info 
= compat_parse_qs(video_info_webpage
) 
 900                     if 'token' in video_info
: 
 902         if 'token' not in video_info
: 
 903             if 'reason' in video_info
: 
 904                 raise ExtractorError( 
 905                     'YouTube said: %s' % video_info
['reason'][0], 
 906                     expected
=True, video_id
=video_id
) 
 908                 raise ExtractorError( 
 909                     '"token" parameter not in video info for unknown reason', 
 912         if 'view_count' in video_info
: 
 913             view_count 
= int(video_info
['view_count'][0]) 
 917         # Check for "rental" videos 
 918         if 'ypc_video_rental_bar_text' in video_info 
and 'author' not in video_info
: 
 919             raise ExtractorError('"rental" videos not supported') 
 921         # Start extracting information 
 922         self
.report_information_extraction(video_id
) 
 925         if 'author' not in video_info
: 
 926             raise ExtractorError('Unable to extract uploader name') 
 927         video_uploader 
= compat_urllib_parse
.unquote_plus(video_info
['author'][0]) 
 930         video_uploader_id 
= None 
 931         mobj 
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
) 
 933             video_uploader_id 
= mobj
.group(1) 
 935             self
._downloader
.report_warning('unable to extract uploader nickname') 
 938         if 'title' in video_info
: 
 939             video_title 
= video_info
['title'][0] 
 941             self
._downloader
.report_warning('Unable to extract video title') 
 945         # We try first to get a high quality image: 
 946         m_thumb 
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">', 
 947                             video_webpage
, re
.DOTALL
) 
 948         if m_thumb 
is not None: 
 949             video_thumbnail 
= m_thumb
.group(1) 
 950         elif 'thumbnail_url' not in video_info
: 
 951             self
._downloader
.report_warning('unable to extract video thumbnail') 
 952             video_thumbnail 
= None 
 953         else:   # don't panic if we can't find it 
 954             video_thumbnail 
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0]) 
 958         mobj 
= re
.search(r
'(?s)id="eow-date.*?>(.*?)</span>', video_webpage
) 
 961                 r
'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>', 
 964             upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
 965             upload_date 
= unified_strdate(upload_date
) 
 967         m_cat_container 
= self
._search
_regex
( 
 968             r
'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', 
 969             video_webpage
, 'categories', default
=None) 
 971             category 
= self
._html
_search
_regex
( 
 972                 r
'(?s)<a[^<]+>(.*?)</a>', m_cat_container
, 'category', 
 974             video_categories 
= None if category 
is None else [category
] 
 976             video_categories 
= None 
 979         video_description 
= get_element_by_id("eow-description", video_webpage
) 
 980         if video_description
: 
 981             video_description 
= re
.sub(r
'''(?x) 
 983                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
 985                     (?:[a-zA-Z-]+="[^"]+"\s+)*? 
 986                     class="yt-uix-redirect-link"\s*> 
 989             ''', r
'\1', video_description
) 
 990             video_description 
= clean_html(video_description
) 
 992             fd_mobj 
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
) 
 994                 video_description 
= unescapeHTML(fd_mobj
.group(1)) 
 996                 video_description 
= '' 
 998         def _extract_count(count_name
): 
 999             count 
= self
._search
_regex
( 
1000                 r
'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re
.escape(count_name
), 
1001                 video_webpage
, count_name
, default
=None) 
1002             if count 
is not None: 
1003                 return int(count
.replace(',', '')) 
1005         like_count 
= _extract_count('like') 
1006         dislike_count 
= _extract_count('dislike') 
1009         video_subtitles 
= self
.extract_subtitles(video_id
, video_webpage
) 
1010         automatic_captions 
= self
.extract_automatic_captions(video_id
, video_webpage
) 
1012         if 'length_seconds' not in video_info
: 
1013             self
._downloader
.report_warning('unable to extract video duration') 
1014             video_duration 
= None 
1016             video_duration 
= int(compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0])) 
1019         video_annotations 
= None 
1020         if self
._downloader
.params
.get('writeannotations', False): 
1021             video_annotations 
= self
._extract
_annotations
(video_id
) 
1023         def _map_to_format_list(urlmap
): 
1025             for itag
, video_real_url 
in urlmap
.items(): 
1028                     'url': video_real_url
, 
1029                     'player_url': player_url
, 
1031                 if itag 
in self
._formats
: 
1032                     dct
.update(self
._formats
[itag
]) 
1036         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1037             self
.report_rtmp_download() 
1039                 'format_id': '_rtmp', 
1041                 'url': video_info
['conn'][0], 
1042                 'player_url': player_url
, 
1044         elif len(video_info
.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info
.get('adaptive_fmts', [''])[0]) >= 1: 
1045             encoded_url_map 
= video_info
.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info
.get('adaptive_fmts', [''])[0] 
1046             if 'rtmpe%3Dyes' in encoded_url_map
: 
1047                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True) 
1049             for url_data_str 
in encoded_url_map
.split(','): 
1050                 url_data 
= compat_parse_qs(url_data_str
) 
1051                 if 'itag' not in url_data 
or 'url' not in url_data
: 
1053                 format_id 
= url_data
['itag'][0] 
1054                 url 
= url_data
['url'][0] 
1056                 if 'sig' in url_data
: 
1057                     url 
+= '&signature=' + url_data
['sig'][0] 
1058                 elif 's' in url_data
: 
1059                     encrypted_sig 
= url_data
['s'][0] 
1060                     ASSETS_RE 
= r
'"assets":.+?"js":\s*("[^"]+")' 
1062                     jsplayer_url_json 
= self
._search
_regex
( 
1064                         embed_webpage 
if age_gate 
else video_webpage
, 
1065                         'JS player URL (1)', default
=None) 
1066                     if not jsplayer_url_json 
and not age_gate
: 
1067                         # We need the embed website after all 
1068                         if embed_webpage 
is None: 
1069                             embed_url 
= proto 
+ '://www.youtube.com/embed/%s' % video_id
 
1070                             embed_webpage 
= self
._download
_webpage
( 
1071                                 embed_url
, video_id
, 'Downloading embed webpage') 
1072                         jsplayer_url_json 
= self
._search
_regex
( 
1073                             ASSETS_RE
, embed_webpage
, 'JS player URL') 
1075                     player_url 
= json
.loads(jsplayer_url_json
) 
1076                     if player_url 
is None: 
1077                         player_url_json 
= self
._search
_regex
( 
1078                             r
'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', 
1079                             video_webpage
, 'age gate player URL') 
1080                         player_url 
= json
.loads(player_url_json
) 
1082                     if self
._downloader
.params
.get('verbose'): 
1083                         if player_url 
is None: 
1084                             player_version 
= 'unknown' 
1085                             player_desc 
= 'unknown' 
1087                             if player_url
.endswith('swf'): 
1088                                 player_version 
= self
._search
_regex
( 
1089                                     r
'-(.+?)(?:/watch_as3)?\.swf$', player_url
, 
1090                                     'flash player', fatal
=False) 
1091                                 player_desc 
= 'flash player %s' % player_version
 
1093                                 player_version 
= self
._search
_regex
( 
1094                                     r
'html5player-([^/]+?)(?:/html5player)?\.js', 
1096                                     'html5 player', fatal
=False) 
1097                                 player_desc 
= 'html5 player %s' % player_version
 
1099                         parts_sizes 
= self
._signature
_cache
_id
(encrypted_sig
) 
1100                         self
.to_screen('{%s} signature length %s, %s' % 
1101                                        (format_id
, parts_sizes
, player_desc
)) 
1103                     signature 
= self
._decrypt
_signature
( 
1104                         encrypted_sig
, video_id
, player_url
, age_gate
) 
1105                     url 
+= '&signature=' + signature
 
1106                 if 'ratebypass' not in url
: 
1107                     url 
+= '&ratebypass=yes' 
1108                 url_map
[format_id
] = url
 
1109             formats 
= _map_to_format_list(url_map
) 
1110         elif video_info
.get('hlsvp'): 
1111             manifest_url 
= video_info
['hlsvp'][0] 
1112             url_map 
= self
._extract
_from
_m
3u8(manifest_url
, video_id
) 
1113             formats 
= _map_to_format_list(url_map
) 
1115             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') 
1117         # Look for the DASH manifest 
1118         if self
._downloader
.params
.get('youtube_include_dash_manifest', True): 
1119             dash_mpd 
= video_info
.get('dashmpd') 
1121                 dash_manifest_url 
= dash_mpd
[0] 
1123                     dash_formats 
= self
._parse
_dash
_manifest
( 
1124                         video_id
, dash_manifest_url
, player_url
, age_gate
) 
1125                 except (ExtractorError
, KeyError) as e
: 
1126                     self
.report_warning( 
1127                         'Skipping DASH manifest: %r' % e
, video_id
) 
1129                     # Remove the formats we found through non-DASH, they 
1130                     # contain less info and it can be wrong, because we use 
1131                     # fixed values (for example the resolution). See 
1132                     # https://github.com/rg3/youtube-dl/issues/5774 for an 
1134                     dash_keys 
= set(df
['format_id'] for df 
in dash_formats
) 
1135                     formats 
= [f 
for f 
in formats 
if f
['format_id'] not in dash_keys
] 
1136                     formats
.extend(dash_formats
) 
1138         # Check for malformed aspect ratio 
1139         stretched_m 
= re
.search( 
1140             r
'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">', 
1143             ratio 
= float(stretched_m
.group('w')) / float(stretched_m
.group('h')) 
1145                 if f
.get('vcodec') != 'none': 
1146                     f
['stretched_ratio'] = ratio
 
1148         self
._sort
_formats
(formats
) 
1152             'uploader': video_uploader
, 
1153             'uploader_id': video_uploader_id
, 
1154             'upload_date': upload_date
, 
1155             'title': video_title
, 
1156             'thumbnail': video_thumbnail
, 
1157             'description': video_description
, 
1158             'categories': video_categories
, 
1159             'subtitles': video_subtitles
, 
1160             'automatic_captions': automatic_captions
, 
1161             'duration': video_duration
, 
1162             'age_limit': 18 if age_gate 
else 0, 
1163             'annotations': video_annotations
, 
1164             'webpage_url': proto 
+ '://www.youtube.com/watch?v=%s' % video_id
, 
1165             'view_count': view_count
, 
1166             'like_count': like_count
, 
1167             'dislike_count': dislike_count
, 
1168             'average_rating': float_or_none(video_info
.get('avg_rating', [None])[0]), 
1173 class YoutubePlaylistIE(YoutubeBaseInfoExtractor
): 
1174     IE_DESC 
= 'YouTube.com playlists' 
1175     _VALID_URL 
= r
"""(?x)(?: 
1180                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) 
1181                            \? (?:.*?&)*? (?:p|a|list)= 
1185                             (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} 
1186                             # Top tracks, they can also include dots 
1191                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) 
1193     _TEMPLATE_URL 
= 'https://www.youtube.com/playlist?list=%s' 
1194     _VIDEO_RE 
= r
'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' 
1195     IE_NAME 
= 'youtube:playlist' 
1197         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 
1199             'title': 'ytdl test PL', 
1200             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 
1202         'playlist_count': 3, 
1204         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 
1206             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 
1207             'title': 'YDL_Empty_List', 
1209         'playlist_count': 0, 
1211         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 
1212         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 
1214             'title': '29C3: Not my department', 
1215             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 
1217         'playlist_count': 95, 
1219         'note': 'issue #673', 
1220         'url': 'PLBB231211A4F62143', 
1222             'title': '[OLD]Team Fortress 2 (Class-based LP)', 
1223             'id': 'PLBB231211A4F62143', 
1225         'playlist_mincount': 26, 
1227         'note': 'Large playlist', 
1228         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 
1230             'title': 'Uploads from Cauchemar', 
1231             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', 
1233         'playlist_mincount': 799, 
1235         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 
1237             'title': 'YDL_safe_search', 
1238             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 
1240         'playlist_count': 2, 
1243         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 
1244         'playlist_count': 4, 
1247             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 
1250         'note': 'Embedded SWF player', 
1251         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', 
1252         'playlist_count': 4, 
1255             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ', 
1258         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 
1259         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 
1261             'title': 'Uploads from Interstellar Movie', 
1262             'id': 'UUXw-G3eDE9trcvY2sBMM_aA', 
1264         'playlist_mincout': 21, 
1267     def _real_initialize(self
): 
1270     def _extract_mix(self
, playlist_id
): 
1271         # The mixes are generated from a single video 
1272         # the id of the playlist is just 'RD' + video_id 
1273         url 
= 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id
[-11:], playlist_id
) 
1274         webpage 
= self
._download
_webpage
( 
1275             url
, playlist_id
, 'Downloading Youtube mix') 
1276         search_title 
= lambda class_name
: get_element_by_attribute('class', class_name
, webpage
) 
1278             search_title('playlist-title') or 
1279             search_title('title long-title') or 
1280             search_title('title')) 
1281         title 
= clean_html(title_span
) 
1282         ids 
= orderedSet(re
.findall( 
1283             r
'''(?xs)data-video-username=".*?".*? 
1284                        href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re
.escape(playlist_id
), 
1286         url_results 
= self
._ids
_to
_results
(ids
) 
1288         return self
.playlist_result(url_results
, playlist_id
, title
) 
1290     def _extract_playlist(self
, playlist_id
): 
1291         url 
= self
._TEMPLATE
_URL 
% playlist_id
 
1292         page 
= self
._download
_webpage
(url
, playlist_id
) 
1293         more_widget_html 
= content_html 
= page
 
1295         for match 
in re
.findall(r
'<div class="yt-alert-message">([^<]+)</div>', page
): 
1296             match 
= match
.strip() 
1297             # Check if the playlist exists or is private 
1298             if re
.match(r
'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match
): 
1299                 raise ExtractorError( 
1300                     'The playlist doesn\'t exist or is private, use --username or ' 
1301                     '--netrc to access it.', 
1303             elif re
.match(r
'[^<]*Invalid parameters[^<]*', match
): 
1304                 raise ExtractorError( 
1305                     'Invalid parameters. Maybe URL is incorrect.', 
1307             elif re
.match(r
'[^<]*Choose your language[^<]*', match
): 
1310                 self
.report_warning('Youtube gives an alert message: ' + match
) 
1312         # Extract the video ids from the playlist pages 
1315         for page_num 
in itertools
.count(1): 
1316             matches 
= re
.finditer(self
._VIDEO
_RE
, content_html
) 
1317             # We remove the duplicates and the link with index 0 
1318             # (it's not the first video of the playlist) 
1319             new_ids 
= orderedSet(m
.group('id') for m 
in matches 
if m
.group('index') != '0') 
1322             mobj 
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
) 
1326             more 
= self
._download
_json
( 
1327                 'https://youtube.com/%s' % mobj
.group('more'), playlist_id
, 
1328                 'Downloading page #%s' % page_num
, 
1329                 transform_source
=uppercase_escape
) 
1330             content_html 
= more
['content_html'] 
1331             if not content_html
.strip(): 
1332                 # Some webpages show a "Load more" button but they don't 
1335             more_widget_html 
= more
['load_more_widget_html'] 
1337         playlist_title 
= self
._html
_search
_regex
( 
1338             r
'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', 
1341         url_results 
= self
._ids
_to
_results
(ids
) 
1342         return self
.playlist_result(url_results
, playlist_id
, playlist_title
) 
1344     def _real_extract(self
, url
): 
1345         # Extract playlist id 
1346         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1348             raise ExtractorError('Invalid URL: %s' % url
) 
1349         playlist_id 
= mobj
.group(1) or mobj
.group(2) 
1351         # Check if it's a video-specific URL 
1352         query_dict 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
1353         if 'v' in query_dict
: 
1354             video_id 
= query_dict
['v'][0] 
1355             if self
._downloader
.params
.get('noplaylist'): 
1356                 self
.to_screen('Downloading just video %s because of --no-playlist' % video_id
) 
1357                 return self
.url_result(video_id
, 'Youtube', video_id
=video_id
) 
1359                 self
.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id
, video_id
)) 
1361         if playlist_id
.startswith('RD') or playlist_id
.startswith('UL'): 
1362             # Mixes require a custom extraction process 
1363             return self
._extract
_mix
(playlist_id
) 
1365         return self
._extract
_playlist
(playlist_id
) 
1368 class YoutubeChannelIE(InfoExtractor
): 
1369     IE_DESC 
= 'YouTube.com channels' 
1370     _VALID_URL 
= r
'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' 
1371     _TEMPLATE_URL 
= 'https://www.youtube.com/channel/%s/videos' 
1372     IE_NAME 
= 'youtube:channel' 
1374         'note': 'paginated channel', 
1375         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 
1376         'playlist_mincount': 91, 
1378             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 
1383     def extract_videos_from_page(page
): 
1386         for mobj 
in re
.finditer(r
'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page
): 
1387             video_id 
= mobj
.group('id') 
1388             video_title 
= unescapeHTML(mobj
.group('title')) 
1390                 idx 
= ids_in_page
.index(video_id
) 
1391                 if video_title 
and not titles_in_page
[idx
]: 
1392                     titles_in_page
[idx
] = video_title
 
1394                 ids_in_page
.append(video_id
) 
1395                 titles_in_page
.append(video_title
) 
1396         return zip(ids_in_page
, titles_in_page
) 
1398     def _real_extract(self
, url
): 
1399         channel_id 
= self
._match
_id
(url
) 
1401         url 
= self
._TEMPLATE
_URL 
% channel_id
 
1403         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778) 
1404         # Workaround by extracting as a playlist if managed to obtain channel playlist URL 
1405         # otherwise fallback on channel by page extraction 
1406         channel_page 
= self
._download
_webpage
( 
1407             url 
+ '?view=57', channel_id
, 
1408             'Downloading channel page', fatal
=False) 
1409         channel_playlist_id 
= self
._search
_regex
( 
1410             [r
'<meta itemprop="channelId" content="([^"]+)">', 
1411              r
'data-channel-external-id="([^"]+)"'], 
1412             channel_page
, 'channel id', default
=None) 
1413         if channel_playlist_id 
and channel_playlist_id
.startswith('UC'): 
1414             playlist_id 
= 'UU' + channel_playlist_id
[2:] 
1415             return self
.url_result( 
1416                 compat_urlparse
.urljoin(url
, '/playlist?list=%s' % playlist_id
), 'YoutubePlaylist') 
1418         channel_page 
= self
._download
_webpage
(url
, channel_id
, 'Downloading page #1') 
1419         autogenerated 
= re
.search(r
'''(?x) 
1421                     channel-header-autogenerated-label| 
1422                     yt-channel-title-autogenerated 
1423                 )[^"]*"''', channel_page
) is not None 
1426             # The videos are contained in a single page 
1427             # the ajax pages can't be used, they are empty 
1430                     video_id
, 'Youtube', video_id
=video_id
, 
1431                     video_title
=video_title
) 
1432                 for video_id
, video_title 
in self
.extract_videos_from_page(channel_page
)] 
1433             return self
.playlist_result(entries
, channel_id
) 
1436             more_widget_html 
= content_html 
= channel_page
 
1437             for pagenum 
in itertools
.count(1): 
1439                 for video_id
, video_title 
in self
.extract_videos_from_page(content_html
): 
1440                     yield self
.url_result( 
1441                         video_id
, 'Youtube', video_id
=video_id
, 
1442                         video_title
=video_title
) 
1445                     r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', 
1450                 more 
= self
._download
_json
( 
1451                     'https://youtube.com/%s' % mobj
.group('more'), channel_id
, 
1452                     'Downloading page #%s' % (pagenum 
+ 1), 
1453                     transform_source
=uppercase_escape
) 
1454                 content_html 
= more
['content_html'] 
1455                 more_widget_html 
= more
['load_more_widget_html'] 
1457         return self
.playlist_result(_entries(), channel_id
) 
1460 class YoutubeUserIE(YoutubeChannelIE
): 
1461     IE_DESC 
= 'YouTube.com user videos (URL or "ytuser" keyword)' 
1462     _VALID_URL 
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' 
1463     _TEMPLATE_URL 
= 'https://www.youtube.com/user/%s/videos' 
1464     IE_NAME 
= 'youtube:user' 
1467         'url': 'https://www.youtube.com/user/TheLinuxFoundation', 
1468         'playlist_mincount': 320, 
1470             'title': 'TheLinuxFoundation', 
1473         'url': 'ytuser:phihag', 
1474         'only_matching': True, 
1478     def suitable(cls
, url
): 
1479         # Don't return True if the url can be extracted with other youtube 
1480         # extractor, the regex would is too permissive and it would match. 
1481         other_ies 
= iter(klass 
for (name
, klass
) in globals().items() if name
.endswith('IE') and klass 
is not cls
) 
1482         if any(ie
.suitable(url
) for ie 
in other_ies
): 
1485             return super(YoutubeUserIE
, cls
).suitable(url
) 
1488 class YoutubeSearchIE(SearchInfoExtractor
, YoutubePlaylistIE
): 
1489     IE_DESC 
= 'YouTube.com searches' 
1490     # there doesn't appear to be a real limit, for example if you search for 
1491     # 'python' you get more than 8.000.000 results 
1492     _MAX_RESULTS 
= float('inf') 
1493     IE_NAME 
= 'youtube:search' 
1494     _SEARCH_KEY 
= 'ytsearch' 
1495     _EXTRA_QUERY_ARGS 
= {} 
1498     def _get_n_results(self
, query
, n
): 
1499         """Get a specified number of results for a query""" 
1504         for pagenum 
in itertools
.count(1): 
1506                 'search_query': query
, 
1510             url_query
.update(self
._EXTRA
_QUERY
_ARGS
) 
1511             result_url 
= 'https://www.youtube.com/results?' + compat_urllib_parse
.urlencode(url_query
) 
1512             data 
= self
._download
_json
( 
1513                 result_url
, video_id
='query "%s"' % query
, 
1514                 note
='Downloading page %s' % pagenum
, 
1515                 errnote
='Unable to download API page') 
1516             html_content 
= data
[1]['body']['content'] 
1518             if 'class="search-message' in html_content
: 
1519                 raise ExtractorError( 
1520                     '[youtube] No video results', expected
=True) 
1522             new_videos 
= self
._ids
_to
_results
(orderedSet(re
.findall( 
1523                 r
'href="/watch\?v=(.{11})', html_content
))) 
1524             videos 
+= new_videos
 
1525             if not new_videos 
or len(videos
) > limit
: 
1530         return self
.playlist_result(videos
, query
) 
1533 class YoutubeSearchDateIE(YoutubeSearchIE
): 
1534     IE_NAME 
= YoutubeSearchIE
.IE_NAME 
+ ':date' 
1535     _SEARCH_KEY 
= 'ytsearchdate' 
1536     IE_DESC 
= 'YouTube.com searches, newest videos first' 
1537     _EXTRA_QUERY_ARGS 
= {'search_sort': 'video_date_uploaded'} 
1540 class YoutubeSearchURLIE(InfoExtractor
): 
1541     IE_DESC 
= 'YouTube.com search URLs' 
1542     IE_NAME 
= 'youtube:search_url' 
1543     _VALID_URL 
= r
'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' 
1545         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 
1546         'playlist_mincount': 5, 
1548             'title': 'youtube-dl test video', 
1552     def _real_extract(self
, url
): 
1553         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1554         query 
= compat_urllib_parse
.unquote_plus(mobj
.group('query')) 
1556         webpage 
= self
._download
_webpage
(url
, query
) 
1557         result_code 
= self
._search
_regex
( 
1558             r
'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage
, 'result HTML') 
1560         part_codes 
= re
.findall( 
1561             r
'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code
) 
1563         for part_code 
in part_codes
: 
1564             part_title 
= self
._html
_search
_regex
( 
1565                 [r
'(?s)title="([^"]+)"', r
'>([^<]+)</a>'], part_code
, 'item title', fatal
=False) 
1566             part_url_snippet 
= self
._html
_search
_regex
( 
1567                 r
'(?s)href="([^"]+)"', part_code
, 'item URL') 
1568             part_url 
= compat_urlparse
.urljoin( 
1569                 'https://www.youtube.com/', part_url_snippet
) 
1573                 'title': part_title
, 
1577             '_type': 'playlist', 
1583 class YoutubeShowIE(InfoExtractor
): 
1584     IE_DESC 
= 'YouTube.com (multi-season) shows' 
1585     _VALID_URL 
= r
'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' 
1586     IE_NAME 
= 'youtube:show' 
1588         'url': 'http://www.youtube.com/show/airdisasters', 
1589         'playlist_mincount': 3, 
1591             'id': 'airdisasters', 
1592             'title': 'Air Disasters', 
1596     def _real_extract(self
, url
): 
1597         mobj 
= re
.match(self
._VALID
_URL
, url
) 
1598         playlist_id 
= mobj
.group('id') 
1599         webpage 
= self
._download
_webpage
( 
1600             url
, playlist_id
, 'Downloading show webpage') 
1601         # There's one playlist for each season of the show 
1602         m_seasons 
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
)) 
1603         self
.to_screen('%s: Found %s seasons' % (playlist_id
, len(m_seasons
))) 
1606                 'https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') 
1607             for season 
in m_seasons
 
1609         title 
= self
._og
_search
_title
(webpage
, fatal
=False) 
1612             '_type': 'playlist', 
1619 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
): 
1621     Base class for feed extractors 
1622     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. 
1624     _LOGIN_REQUIRED 
= True 
1628         return 'youtube:%s' % self
._FEED
_NAME
 
1630     def _real_initialize(self
): 
1633     def _real_extract(self
, url
): 
1634         page 
= self
._download
_webpage
( 
1635             'https://www.youtube.com/feed/%s' % self
._FEED
_NAME
, self
._PLAYLIST
_TITLE
) 
1637         # The extraction process is the same as for playlists, but the regex 
1638         # for the video ids doesn't contain an index 
1640         more_widget_html 
= content_html 
= page
 
1641         for page_num 
in itertools
.count(1): 
1642             matches 
= re
.findall(r
'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html
) 
1644             # 'recommended' feed has infinite 'load more' and each new portion spins 
1645             # the same videos in (sometimes) slightly different order, so we'll check 
1646             # for unicity and break when portion has no new videos 
1647             new_ids 
= filter(lambda video_id
: video_id 
not in ids
, orderedSet(matches
)) 
1653             mobj 
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
) 
1657             more 
= self
._download
_json
( 
1658                 'https://youtube.com/%s' % mobj
.group('more'), self
._PLAYLIST
_TITLE
, 
1659                 'Downloading page #%s' % page_num
, 
1660                 transform_source
=uppercase_escape
) 
1661             content_html 
= more
['content_html'] 
1662             more_widget_html 
= more
['load_more_widget_html'] 
1664         return self
.playlist_result( 
1665             self
._ids
_to
_results
(ids
), playlist_title
=self
._PLAYLIST
_TITLE
) 
1668 class YoutubeWatchLaterIE(YoutubePlaylistIE
): 
1669     IE_NAME 
= 'youtube:watchlater' 
1670     IE_DESC 
= 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' 
1671     _VALID_URL 
= r
'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' 
1673     _TESTS 
= []  # override PlaylistIE tests 
1675     def _real_extract(self
, url
): 
1676         return self
._extract
_playlist
('WL') 
1679 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
): 
1680     IE_NAME 
= 'youtube:favorites' 
1681     IE_DESC 
= 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)' 
1682     _VALID_URL 
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' 
1683     _LOGIN_REQUIRED 
= True 
1685     def _real_extract(self
, url
): 
1686         webpage 
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') 
1687         playlist_id 
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, 'favourites playlist id') 
1688         return self
.url_result(playlist_id
, 'YoutubePlaylist') 
1691 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
): 
1692     IE_DESC 
= 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' 
1693     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' 
1694     _FEED_NAME 
= 'recommended' 
1695     _PLAYLIST_TITLE 
= 'Youtube Recommended videos' 
1698 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
): 
1699     IE_DESC 
= 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' 
1700     _VALID_URL 
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' 
1701     _FEED_NAME 
= 'subscriptions' 
1702     _PLAYLIST_TITLE 
= 'Youtube Subscriptions' 
1705 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor
): 
1706     IE_DESC 
= 'Youtube watch history, ":ythistory" for short (requires authentication)' 
1707     _VALID_URL 
= 'https?://www\.youtube\.com/feed/history|:ythistory' 
1708     _FEED_NAME 
= 'history' 
1709     _PLAYLIST_TITLE 
= 'Youtube History' 
1712 class YoutubeTruncatedURLIE(InfoExtractor
): 
1713     IE_NAME 
= 'youtube:truncated_url' 
1714     IE_DESC 
= False  # Do not list 
1715     _VALID_URL 
= r
'''(?x) 
1717         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/ 
1720             annotation_id=annotation_[^&]+| 
1725             attribution_link\?a=[^&]+ 
1731         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', 
1732         'only_matching': True, 
1734         'url': 'http://www.youtube.com/watch?', 
1735         'only_matching': True, 
1737         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', 
1738         'only_matching': True, 
1740         'url': 'https://www.youtube.com/watch?feature=foo', 
1741         'only_matching': True, 
1743         'url': 'https://www.youtube.com/watch?hl=en-GB', 
1744         'only_matching': True, 
1747     def _real_extract(self
, url
): 
1748         raise ExtractorError( 
1749             'Did you forget to quote the URL? Remember that & is a meta ' 
1750             'character in most shells, so you want to put the URL in quotes, ' 
1752             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' 
1753             ' or simply  youtube-dl BaW_jenozKc  .', 
1757 class YoutubeTruncatedIDIE(InfoExtractor
): 
1758     IE_NAME 
= 'youtube:truncated_id' 
1759     IE_DESC 
= False  # Do not list 
1760     _VALID_URL 
= r
'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$' 
1763         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob', 
1764         'only_matching': True, 
1767     def _real_extract(self
, url
): 
1768         video_id 
= self
._match
_id
(url
) 
1769         raise ExtractorError( 
1770             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id
, url
),