3 from __future__
import unicode_literals
14 from .common
import InfoExtractor
, SearchInfoExtractor
15 from ..jsinterp
import JSInterpreter
16 from ..swfinterp
import SWFInterpreter
17 from ..compat
import (
21 compat_urllib_parse_unquote
,
22 compat_urllib_parse_unquote_plus
,
23 compat_urllib_parse_urlencode
,
24 compat_urllib_parse_urlparse
,
33 get_element_by_attribute
,
54 class YoutubeBaseInfoExtractor(InfoExtractor
):
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL
= 'https://accounts.google.com/ServiceLogin'
57 _TWOFACTOR_URL
= 'https://accounts.google.com/signin/challenge'
59 _LOOKUP_URL
= 'https://accounts.google.com/_/signin/sl/lookup'
60 _CHALLENGE_URL
= 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL
= 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
63 _NETRC_MACHINE
= 'youtube'
64 # If True it will raise an error if no login info is provided
65 _LOGIN_REQUIRED
= False
67 _PLAYLIST_ID_RE
= r
'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
69 def _set_language(self
):
71 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
72 # YouTube sets the expire time to about two months
73 expire_time
=time
.time() + 2 * 30 * 24 * 3600)
75 def _ids_to_results(self
, ids
):
77 self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
)
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 username
, password
= self
._get
_login
_info
()
89 # No authentication to be performed
91 if self
._LOGIN
_REQUIRED
and self
._downloader
.params
.get('cookiefile') is None:
92 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True)
95 login_page
= self
._download
_webpage
(
96 self
._LOGIN
_URL
, None,
97 note
='Downloading login page',
98 errnote
='unable to fetch login page', fatal
=False)
99 if login_page
is False:
102 login_form
= self
._hidden
_inputs
(login_page
)
104 def req(url
, f_req
, note
, errnote
):
105 data
= login_form
.copy()
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
112 'f.req': json
.dumps(f_req
),
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
116 return self
._download
_json
(
117 url
, None, note
=note
, errnote
=errnote
,
118 transform_source
=lambda s
: re
.sub(r
'^[^[]*', '', s
),
120 data
=urlencode_postdata(data
), headers
={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
126 self
._downloader
.report_warning(message
)
130 None, [], None, 'US', None, None, 2, False, True,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 1, [None, None, []], None, None, None, True
141 lookup_results
= req(
142 self
._LOOKUP
_URL
, lookup_req
,
143 'Looking up account info', 'Unable to look up account info')
145 if lookup_results
is False:
148 user_hash
= try_get(lookup_results
, lambda x
: x
[0][2], compat_str
)
150 warn('Unable to extract user hash')
155 None, 1, None, [1, None, None, None, [password
, None, True]],
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
161 challenge_results
= req(
162 self
._CHALLENGE
_URL
, challenge_req
,
163 'Logging in', 'Unable to log in')
165 if challenge_results
is False:
168 login_res
= try_get(challenge_results
, lambda x
: x
[0][5], list)
170 login_msg
= try_get(login_res
, lambda x
: x
[5], compat_str
)
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg
== 'INCORRECT_ANSWER_ENTERED' else login_msg
)
176 res
= try_get(challenge_results
, lambda x
: x
[0][-1], list)
178 warn('Unable to extract result entry')
181 login_challenge
= try_get(res
, lambda x
: x
[0][0], list)
183 challenge_str
= try_get(login_challenge
, lambda x
: x
[2], compat_str
)
184 if challenge_str
== 'TWO_STEP_VERIFICATION':
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
187 status
= try_get(login_challenge
, lambda x
: x
[5], compat_str
)
188 if status
== 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
192 tl
= try_get(challenge_results
, lambda x
: x
[1][2], compat_str
)
194 warn('Unable to extract TL')
197 tfa_code
= self
._get
_tfa
_info
('2-step verification code')
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 tfa_code
= remove_start(tfa_code
, 'G-')
208 user_hash
, None, 2, None,
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code
, True, 2]
215 self
._TFA
_URL
.format(tl
), tfa_req
,
216 'Submitting TFA code', 'Unable to submit TFA code')
218 if tfa_results
is False:
221 tfa_res
= try_get(tfa_results
, lambda x
: x
[0][5], list)
223 tfa_msg
= try_get(tfa_res
, lambda x
: x
[5], compat_str
)
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg
== 'INCORRECT_ANSWER_ENTERED' else tfa_msg
)
229 check_cookie_url
= try_get(
230 tfa_results
, lambda x
: x
[0][-1][2], compat_str
)
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
237 challenge
= CHALLENGES
.get(
239 '%s returned error %s.' % (self
.IE_NAME
, challenge_str
))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge
)
243 check_cookie_url
= try_get(res
, lambda x
: x
[2], compat_str
)
245 if not check_cookie_url
:
246 warn('Unable to extract CheckCookie URL')
249 check_cookie_results
= self
._download
_webpage
(
250 check_cookie_url
, None, 'Checking cookie', fatal
=False)
252 if check_cookie_results
is False:
255 if 'https://myaccount.google.com/' not in check_cookie_results
:
256 warn('Unable to log in')
261 def _download_webpage_handle(self
, *args
, **kwargs
):
262 kwargs
.setdefault('query', {})['disable_polymer'] = 'true'
263 return super(YoutubeBaseInfoExtractor
, self
)._download
_webpage
_handle
(
264 *args
, **compat_kwargs(kwargs
))
266 def _real_initialize(self
):
267 if self
._downloader
is None:
270 if not self
._login
():
274 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor
):
275 # Extract entries from page with "Load more" button
276 def _entries(self
, page
, playlist_id
):
277 more_widget_html
= content_html
= page
278 for page_num
in itertools
.count(1):
279 for entry
in self
._process
_page
(content_html
):
282 mobj
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
)
286 more
= self
._download
_json
(
287 'https://youtube.com/%s' % mobj
.group('more'), playlist_id
,
288 'Downloading page #%s' % page_num
,
289 transform_source
=uppercase_escape
)
290 content_html
= more
['content_html']
291 if not content_html
.strip():
292 # Some webpages show a "Load more" button but they don't
295 more_widget_html
= more
['load_more_widget_html']
298 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor
):
299 def _process_page(self
, content
):
300 for video_id
, video_title
in self
.extract_videos_from_page(content
):
301 yield self
.url_result(video_id
, 'Youtube', video_id
, video_title
)
303 def extract_videos_from_page(self
, page
):
306 for mobj
in re
.finditer(self
._VIDEO
_RE
, page
):
307 # The link with index 0 is not the first video of the playlist (not sure if still actual)
308 if 'index' in mobj
.groupdict() and mobj
.group('id') == '0':
310 video_id
= mobj
.group('id')
311 video_title
= unescapeHTML(mobj
.group('title'))
313 video_title
= video_title
.strip()
315 idx
= ids_in_page
.index(video_id
)
316 if video_title
and not titles_in_page
[idx
]:
317 titles_in_page
[idx
] = video_title
319 ids_in_page
.append(video_id
)
320 titles_in_page
.append(video_title
)
321 return zip(ids_in_page
, titles_in_page
)
324 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor
):
325 def _process_page(self
, content
):
326 for playlist_id
in orderedSet(re
.findall(
327 r
'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
329 yield self
.url_result(
330 'https://www.youtube.com/playlist?list=%s' % playlist_id
, 'YoutubePlaylist')
332 def _real_extract(self
, url
):
333 playlist_id
= self
._match
_id
(url
)
334 webpage
= self
._download
_webpage
(url
, playlist_id
)
335 title
= self
._og
_search
_title
(webpage
, fatal
=False)
336 return self
.playlist_result(self
._entries
(webpage
, playlist_id
), playlist_id
, title
)
339 class YoutubeIE(YoutubeBaseInfoExtractor
):
340 IE_DESC
= 'YouTube.com'
341 _VALID_URL
= r
"""(?x)^
343 (?:https?://|//) # http(s):// or protocol-independent URL
344 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
345 (?:www\.)?deturl\.com/www\.youtube\.com/|
346 (?:www\.)?pwnyoutube\.com/|
347 (?:www\.)?hooktube\.com/|
348 (?:www\.)?yourepeat\.com/|
349 tube\.majestyc\.net/|
350 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
351 (?:.*?\#/)? # handle anchor (#/) redirect urls
352 (?: # the various things that can precede the ID:
353 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
354 |(?: # or the v= param in all its forms
355 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
356 (?:\?|\#!?) # the params delimiter ? or # or #!
357 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
362 youtu\.be| # just youtu.be/xxxx
363 vid\.plus| # or vid.plus/xxxx
364 zwearz\.com/watch| # or zwearz.com/watch/xxxx
366 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
368 )? # all until now is optional -> you can pass the naked ID
369 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
372 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
373 WL # WL are handled by the watch later IE
376 (?(1).+)? # if we found the ID, everything can follow
377 $""" % {'playlist_id': YoutubeBaseInfoExtractor
._PLAYLIST
_ID
_RE
}
378 _NEXT_URL_RE
= r
'[\?&]next_url=([^&]+)'
380 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
381 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
382 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
383 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
384 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
385 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
386 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
387 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
388 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
389 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
390 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
391 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
392 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
393 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
394 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
395 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
396 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
397 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
401 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
402 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
403 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
404 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
405 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
406 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
407 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
409 # Apple HTTP Live Streaming
410 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
411 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
412 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
413 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
414 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
415 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
416 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
417 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
420 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
421 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
422 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
423 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
424 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
425 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
426 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
427 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
428 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
429 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
430 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
431 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
434 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
435 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
436 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
437 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
438 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
439 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
440 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
443 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
444 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
445 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
446 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
447 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
448 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
449 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
450 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
451 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
452 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
454 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
455 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
456 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
457 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
458 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
459 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
460 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
461 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
462 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
463 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
464 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
467 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
468 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
470 # Dash webm audio with opus inside
471 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
472 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
473 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
476 '_rtmp': {'protocol': 'rtmp'},
478 _SUBTITLE_FORMATS
= ('ttml', 'vtt')
485 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
489 'title': 'youtube-dl test video "\'/\\Ƥāš',
490 'uploader': 'Philipp Hagemeister',
491 'uploader_id': 'phihag',
492 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/phihag',
493 'upload_date': '20121002',
494 'license': 'Standard YouTube License',
495 'description': 'test chars: "\'/\\Ƥāš\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
496 'categories': ['Science & Technology'],
497 'tags': ['youtube-dl'],
500 'dislike_count': int,
506 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
507 'note': 'Test generic use_cipher_signature video (#897)',
511 'upload_date': '20120506',
512 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
513 'alt_title': 'I Love It (feat. Charli XCX)',
514 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
515 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
516 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
517 'iconic ep', 'iconic', 'love', 'it'],
519 'uploader': 'Icona Pop',
520 'uploader_id': 'IconaPop',
521 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/IconaPop',
522 'license': 'Standard YouTube License',
523 'creator': 'Icona Pop',
524 'track': 'I Love It (feat. Charli XCX)',
525 'artist': 'Icona Pop',
529 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
530 'note': 'Test VEVO video with age protection (#956)',
534 'upload_date': '20130703',
535 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
536 'alt_title': 'Tunnel Vision',
537 'description': 'md5:64249768eec3bc4276236606ea996373',
539 'uploader': 'justintimberlakeVEVO',
540 'uploader_id': 'justintimberlakeVEVO',
541 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
542 'license': 'Standard YouTube License',
543 'creator': 'Justin Timberlake',
544 'track': 'Tunnel Vision',
545 'artist': 'Justin Timberlake',
550 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
551 'note': 'Embed-only video (#1746)',
555 'upload_date': '20120608',
556 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
557 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
558 'uploader': 'SET India',
559 'uploader_id': 'setindia',
560 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/setindia',
561 'license': 'Standard YouTube License',
566 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
567 'note': 'Use the first video ID in the URL',
571 'title': 'youtube-dl test video "\'/\\Ƥāš',
572 'uploader': 'Philipp Hagemeister',
573 'uploader_id': 'phihag',
574 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/phihag',
575 'upload_date': '20121002',
576 'license': 'Standard YouTube License',
577 'description': 'test chars: "\'/\\Ƥāš\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
578 'categories': ['Science & Technology'],
579 'tags': ['youtube-dl'],
582 'dislike_count': int,
585 'skip_download': True,
589 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
590 'note': '256k DASH audio (format 141) via DASH manifest',
594 'upload_date': '20121002',
595 'uploader_id': '8KVIDEO',
596 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
598 'uploader': '8KVIDEO',
599 'license': 'Standard YouTube License',
600 'title': 'UHDTV TEST 8K VIDEO.mp4'
603 'youtube_include_dash_manifest': True,
606 'skip': 'format 141 not served anymore',
608 # DASH manifest with encrypted signature
610 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
614 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
615 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
617 'uploader': 'AfrojackVEVO',
618 'uploader_id': 'AfrojackVEVO',
619 'upload_date': '20131011',
620 'license': 'Standard YouTube License',
623 'youtube_include_dash_manifest': True,
624 'format': '141/bestaudio[ext=m4a]',
627 # JS player signature function name containing $
629 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
633 'title': 'Taylor Swift - Shake It Off',
634 'alt_title': 'Shake It Off',
635 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
637 'uploader': 'TaylorSwiftVEVO',
638 'uploader_id': 'TaylorSwiftVEVO',
639 'upload_date': '20140818',
640 'license': 'Standard YouTube License',
641 'creator': 'Taylor Swift',
644 'youtube_include_dash_manifest': True,
645 'format': '141/bestaudio[ext=m4a]',
650 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
655 'upload_date': '20100909',
656 'uploader': 'TJ Kirk',
657 'uploader_id': 'TheAmazingAtheist',
658 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
659 'license': 'Standard YouTube License',
660 'title': 'Burning Everyone\'s Koran',
661 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
664 # Normal age-gate video (No vevo, embed allowed)
666 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
670 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
671 'description': r
're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
673 'uploader': 'The Witcher',
674 'uploader_id': 'WitcherGame',
675 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
676 'upload_date': '20140605',
677 'license': 'Standard YouTube License',
681 # Age-gate video with encrypted signature
683 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
687 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
688 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
690 'uploader': 'LloydVEVO',
691 'uploader_id': 'LloydVEVO',
692 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
693 'upload_date': '20110629',
694 'license': 'Standard YouTube License',
698 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
699 # YouTube Red ad is not captured for creator
701 'url': '__2ABJjxzNo',
706 'upload_date': '20100430',
707 'uploader_id': 'deadmau5',
708 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/deadmau5',
709 'creator': 'deadmau5',
710 'description': 'md5:12c56784b8032162bb936a5f76d55360',
711 'uploader': 'deadmau5',
712 'license': 'Standard YouTube License',
713 'title': 'Deadmau5 - Some Chords (HD)',
714 'alt_title': 'Some Chords',
716 'expected_warnings': [
717 'DASH manifest missing',
720 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
722 'url': 'lqQg6PlCWgI',
727 'upload_date': '20150827',
728 'uploader_id': 'olympic',
729 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/olympic',
730 'license': 'Standard YouTube License',
731 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
732 'uploader': 'Olympic',
733 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
736 'skip_download': 'requires avconv',
741 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
745 'stretched_ratio': 16 / 9.,
747 'upload_date': '20110310',
748 'uploader_id': 'AllenMeow',
749 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
750 'description': 'made by Wacom from Korea | åå¹&å ę²¹ę·»é by TY\'s Allen | ęč¬heylisa00cavey1001ååøē±ę
ęä¾ę¢åēæ»čÆ',
751 'uploader': 'å«įį
',
752 'license': 'Standard YouTube License',
753 'title': '[A-made] č®ę
å¦åå¹ē å¤Ŗå¦ ęå°±ęÆéęØ£ēäŗŗ',
756 # url_encoded_fmt_stream_map is empty string
758 'url': 'qEJwOuvDf7I',
762 'title': 'ŠŠ±ŃŃŠ¶Š“ŠµŠ½ŠøŠµ ŃŃŠ“ŠµŠ±Š½Š¾Š¹ ŠæŃŠ°ŠŗŃŠøŠŗŠø ŠæŠ¾ Š²ŃŠ±Š¾ŃŠ°Š¼ 14 ŃŠµŠ½ŃŃŠ±ŃŃ 2014 Š³Š¾Š“Š° Š² Š”Š°Š½ŠŗŃ-ŠŠµŃŠµŃŠ±ŃŃŠ³Šµ',
764 'upload_date': '20150404',
765 'uploader_id': 'spbelect',
766 'uploader': 'ŠŠ°Š±Š»ŃŠ“Š°ŃŠµŠ»Šø ŠŠµŃŠµŃŠ±ŃŃŠ³Š°',
769 'skip_download': 'requires avconv',
771 'skip': 'This live event has ended.',
773 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
775 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
779 'title': 'md5:7b81415841e02ecd4313668cde88737a',
780 'description': 'md5:116377fd2963b81ec4ce64b542173306',
782 'upload_date': '20150625',
783 'uploader_id': 'dorappi2000',
784 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
785 'uploader': 'dorappi2000',
786 'license': 'Standard YouTube License',
787 'formats': 'mincount:31',
789 'skip': 'not actual anymore',
791 # DASH manifest with segment_list
793 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
794 'md5': '8ce563a1d667b599d21064e982ab9e31',
798 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
799 'uploader': 'Airtek',
800 'description': 'RetransmisiĆ³n en directo de la XVIII media maratĆ³n de Zaragoza.',
801 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
802 'license': 'Standard YouTube License',
803 'title': 'RetransmisiĆ³n XVIII Media maratĆ³n Zaragoza 2015',
806 'youtube_include_dash_manifest': True,
807 'format': '135', # bestvideo
809 'skip': 'This live event has ended.',
812 # Multifeed videos (multiple cameras), URL is for Main Camera
813 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
816 'title': 'teamPGP: Rocket League Noob Stream',
817 'description': 'md5:dc7872fb300e143831327f1bae3af010',
823 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
824 'description': 'md5:dc7872fb300e143831327f1bae3af010',
826 'upload_date': '20150721',
827 'uploader': 'Beer Games Beer',
828 'uploader_id': 'beergamesbeer',
829 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
830 'license': 'Standard YouTube License',
836 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
837 'description': 'md5:dc7872fb300e143831327f1bae3af010',
839 'upload_date': '20150721',
840 'uploader': 'Beer Games Beer',
841 'uploader_id': 'beergamesbeer',
842 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
843 'license': 'Standard YouTube License',
849 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
850 'description': 'md5:dc7872fb300e143831327f1bae3af010',
852 'upload_date': '20150721',
853 'uploader': 'Beer Games Beer',
854 'uploader_id': 'beergamesbeer',
855 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
856 'license': 'Standard YouTube License',
862 'title': 'teamPGP: Rocket League Noob Stream (zim)',
863 'description': 'md5:dc7872fb300e143831327f1bae3af010',
865 'upload_date': '20150721',
866 'uploader': 'Beer Games Beer',
867 'uploader_id': 'beergamesbeer',
868 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
869 'license': 'Standard YouTube License',
873 'skip_download': True,
877 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
878 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
881 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
884 'skip': 'Not multifeed anymore',
887 'url': 'https://vid.plus/FlRa-iH7PGw',
888 'only_matching': True,
891 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
892 'only_matching': True,
895 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
896 # Also tests cut-off URL expansion in video description (see
897 # https://github.com/rg3/youtube-dl/issues/1892,
898 # https://github.com/rg3/youtube-dl/issues/8164)
899 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
903 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
904 'alt_title': 'Dark Walk - Position Music',
905 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
907 'upload_date': '20151119',
908 'uploader_id': 'IronSoulElf',
909 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
910 'uploader': 'IronSoulElf',
911 'license': 'Standard YouTube License',
912 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
913 'track': 'Dark Walk - Position Music',
914 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
917 'skip_download': True,
921 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
922 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
923 'only_matching': True,
926 # Video with yt:stretch=17:0
927 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
931 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
932 'description': 'md5:ee18a25c350637c8faff806845bddee9',
933 'upload_date': '20151107',
934 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
935 'uploader': 'CH GAMER DROID',
938 'skip_download': True,
940 'skip': 'This video does not exist.',
943 # Video licensed under Creative Commons
944 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
948 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
949 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
951 'upload_date': '20150127',
952 'uploader_id': 'BerkmanCenter',
953 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
954 'uploader': 'The Berkman Klein Center for Internet & Society',
955 'license': 'Creative Commons Attribution license (reuse allowed)',
958 'skip_download': True,
962 # Channel-like uploader_url
963 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
967 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
968 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
970 'upload_date': '20151119',
971 'uploader': 'Bernie Sanders',
972 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
973 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
974 'license': 'Creative Commons Attribution license (reuse allowed)',
977 'skip_download': True,
981 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
982 'only_matching': True,
985 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
986 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
987 'only_matching': True,
990 # Rental video preview
991 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
995 'title': 'Piku - Trailer',
996 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
997 'upload_date': '20150811',
998 'uploader': 'FlixMatrix',
999 'uploader_id': 'FlixMatrixKaravan',
1000 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1001 'license': 'Standard YouTube License',
1004 'skip_download': True,
1006 'skip': 'This video is not available.',
1009 # YouTube Red video with episode data
1010 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1012 'id': 'iqKdEhx-dD4',
1014 'title': 'Isolation - Mind Field (Ep 1)',
1015 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1017 'upload_date': '20170118',
1018 'uploader': 'Vsauce',
1019 'uploader_id': 'Vsauce',
1020 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1021 'license': 'Standard YouTube License',
1022 'series': 'Mind Field',
1024 'episode_number': 1,
1027 'skip_download': True,
1029 'expected_warnings': [
1030 'Skipping DASH manifest',
1034 # The following content has been identified by the YouTube community
1035 # as inappropriate or offensive to some audiences.
1036 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1038 'id': '6SJNVb0GnPI',
1040 'title': 'Race Differences in Intelligence',
1041 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1043 'upload_date': '20140124',
1044 'uploader': 'New Century Foundation',
1045 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1046 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1047 'license': 'Standard YouTube License',
1050 'skip_download': True,
1055 'url': '1t24XAntNCY',
1056 'only_matching': True,
1059 # geo restricted to JP
1060 'url': 'sJL6WA-aGkQ',
1061 'only_matching': True,
1064 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1065 'only_matching': True,
1069 def __init__(self
, *args
, **kwargs
):
1070 super(YoutubeIE
, self
).__init
__(*args
, **kwargs
)
1071 self
._player
_cache
= {}
1073 def report_video_info_webpage_download(self
, video_id
):
1074 """Report attempt to download video info webpage."""
1075 self
.to_screen('%s: Downloading video info webpage' % video_id
)
1077 def report_information_extraction(self
, video_id
):
1078 """Report attempt to extract video information."""
1079 self
.to_screen('%s: Extracting video information' % video_id
)
1081 def report_unavailable_format(self
, video_id
, format
):
1082 """Report extracted video URL."""
1083 self
.to_screen('%s: Format %s not available' % (video_id
, format
))
1085 def report_rtmp_download(self
):
1086 """Indicate the download will use the RTMP protocol."""
1087 self
.to_screen('RTMP download detected')
1089 def _signature_cache_id(self
, example_sig
):
1090 """ Return a string representation of a signature """
1091 return '.'.join(compat_str(len(part
)) for part
in example_sig
.split('.'))
1093 def _extract_signature_function(self
, video_id
, player_url
, example_sig
):
1095 r
'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1098 raise ExtractorError('Cannot identify player %r' % player_url
)
1099 player_type
= id_m
.group('ext')
1100 player_id
= id_m
.group('id')
1102 # Read from filesystem cache
1103 func_id
= '%s_%s_%s' % (
1104 player_type
, player_id
, self
._signature
_cache
_id
(example_sig
))
1105 assert os
.path
.basename(func_id
) == func_id
1107 cache_spec
= self
._downloader
.cache
.load('youtube-sigfuncs', func_id
)
1108 if cache_spec
is not None:
1109 return lambda s
: ''.join(s
[i
] for i
in cache_spec
)
1112 'Downloading player %s' % player_url
1113 if self
._downloader
.params
.get('verbose') else
1114 'Downloading %s player %s' % (player_type
, player_id
)
1116 if player_type
== 'js':
1117 code
= self
._download
_webpage
(
1118 player_url
, video_id
,
1120 errnote
='Download of %s failed' % player_url
)
1121 res
= self
._parse
_sig
_js
(code
)
1122 elif player_type
== 'swf':
1123 urlh
= self
._request
_webpage
(
1124 player_url
, video_id
,
1126 errnote
='Download of %s failed' % player_url
)
1128 res
= self
._parse
_sig
_swf
(code
)
1130 assert False, 'Invalid player type %r' % player_type
1132 test_string
= ''.join(map(compat_chr
, range(len(example_sig
))))
1133 cache_res
= res(test_string
)
1134 cache_spec
= [ord(c
) for c
in cache_res
]
1136 self
._downloader
.cache
.store('youtube-sigfuncs', func_id
, cache_spec
)
1139 def _print_sig_code(self
, func
, example_sig
):
1140 def gen_sig_code(idxs
):
1141 def _genslice(start
, end
, step
):
1142 starts
= '' if start
== 0 else str(start
)
1143 ends
= (':%d' % (end
+ step
)) if end
+ step
>= 0 else ':'
1144 steps
= '' if step
== 1 else (':%d' % step
)
1145 return 's[%s%s%s]' % (starts
, ends
, steps
)
1148 # Quelch pyflakes warnings - start will be set when step is set
1149 start
= '(Never used)'
1150 for i
, prev
in zip(idxs
[1:], idxs
[:-1]):
1151 if step
is not None:
1152 if i
- prev
== step
:
1154 yield _genslice(start
, prev
, step
)
1157 if i
- prev
in [-1, 1]:
1162 yield 's[%d]' % prev
1166 yield _genslice(start
, i
, step
)
1168 test_string
= ''.join(map(compat_chr
, range(len(example_sig
))))
1169 cache_res
= func(test_string
)
1170 cache_spec
= [ord(c
) for c
in cache_res
]
1171 expr_code
= ' + '.join(gen_sig_code(cache_spec
))
1172 signature_id_tuple
= '(%s)' % (
1173 ', '.join(compat_str(len(p
)) for p
in example_sig
.split('.')))
1174 code
= ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1175 ' return %s\n') % (signature_id_tuple
, expr_code
)
1176 self
.to_screen('Extracted signature function:\n' + code
)
1178 def _parse_sig_js(self
, jscode
):
1179 funcname
= self
._search
_regex
(
1180 (r
'(["\'])signature\
1\s
*,\s
*(?P
<sig
>[a
-zA
-Z0
-9$
]+)\
(',
1181 r'\
.sig\|\|
(?P
<sig
>[a
-zA
-Z0
-9$
]+)\
(',
1182 r'yt\
.akamaized\
.net
/\
)\s
*\|\|\s
*.*?\s
*c\s
*&&\s
*d\
.set\
([^
,]+\s
*,\s
*(?P
<sig
>[a
-zA
-Z0
-9$
]+)\
(',
1183 r'\bc\s
*&&\s
*d\
.set\
([^
,]+\s
*,\s
*(?P
<sig
>[a
-zA
-Z0
-9$
]+)\
('),
1184 jscode, 'Initial JS player signature function name
', group='sig
')
1186 jsi = JSInterpreter(jscode)
1187 initial_function = jsi.extract_function(funcname)
1188 return lambda s: initial_function([s])
1190 def _parse_sig_swf(self, file_contents):
1191 swfi = SWFInterpreter(file_contents)
1192 TARGET_CLASSNAME = 'SignatureDecipher
'
1193 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1194 initial_function = swfi.extract_function(searched_class, 'decipher
')
1195 return lambda s: initial_function([s])
1197 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1198 """Turn the encrypted s field into a working signature"""
1200 if player_url is None:
1201 raise ExtractorError('Cannot decrypt signature without player_url
')
1203 if player_url.startswith('//'):
1204 player_url = 'https
:' + player_url
1205 elif not re.match(r'https?
://', player_url):
1206 player_url = compat_urlparse.urljoin(
1207 'https
://www
.youtube
.com
', player_url)
1209 player_id = (player_url, self._signature_cache_id(s))
1210 if player_id not in self._player_cache:
1211 func = self._extract_signature_function(
1212 video_id, player_url, s
1214 self._player_cache[player_id] = func
1215 func = self._player_cache[player_id]
1216 if self._downloader.params.get('youtube_print_sig_code
'):
1217 self._print_sig_code(func, s)
1219 except Exception as e:
1220 tb = traceback.format_exc()
1221 raise ExtractorError(
1222 'Signature extraction failed
: ' + tb, cause=e)
1224 def _get_subtitles(self, video_id, webpage):
1226 subs_doc = self._download_xml(
1227 'https
://video
.google
.com
/timedtext?hl
=en
&type=list&v
=%s' % video_id,
1228 video_id, note=False)
1229 except ExtractorError as err:
1230 self._downloader.report_warning('unable to download video subtitles
: %s' % error_to_compat_str(err))
1234 for track in subs_doc.findall('track
'):
1235 lang = track.attrib['lang_code
']
1236 if lang in sub_lang_list:
1239 for ext in self._SUBTITLE_FORMATS:
1240 params = compat_urllib_parse_urlencode({
1244 'name
': track.attrib['name
'].encode('utf
-8'),
1246 sub_formats.append({
1247 'url
': 'https
://www
.youtube
.com
/api
/timedtext?
' + params,
1250 sub_lang_list[lang] = sub_formats
1251 if not sub_lang_list:
1252 self._downloader.report_warning('video doesn
\'t have subtitles
')
1254 return sub_lang_list
1256 def _get_ytplayer_config(self, video_id, webpage):
1258 # User data may contain arbitrary character sequences that may affect
1259 # JSON extraction with regex, e.g. when '};' is contained the second
1260 # regex won't capture the whole JSON
. Yet working around by trying more
1261 # concrete regex first keeping in mind proper quoted string handling
1262 # to be implemented in future that will replace this workaround (see
1263 # https://github.com/rg3/youtube-dl/issues/7468,
1264 # https://github.com/rg3/youtube-dl/pull/7599)
1265 r
';ytplayer\.config\s*=\s*({.+?});ytplayer',
1266 r
';ytplayer\.config\s*=\s*({.+?});',
1268 config
= self
._search
_regex
(
1269 patterns
, webpage
, 'ytplayer.config', default
=None)
1271 return self
._parse
_json
(
1272 uppercase_escape(config
), video_id
, fatal
=False)
1274 def _get_automatic_captions(self
, video_id
, webpage
):
1275 """We need the webpage for getting the captions url, pass it as an
1276 argument to speed up the process."""
1277 self
.to_screen('%s: Looking for automatic captions' % video_id
)
1278 player_config
= self
._get
_ytplayer
_config
(video_id
, webpage
)
1279 err_msg
= 'Couldn\'t find automatic captions for %s' % video_id
1280 if not player_config
:
1281 self
._downloader
.report_warning(err_msg
)
1284 args
= player_config
['args']
1285 caption_url
= args
.get('ttsurl')
1287 timestamp
= args
['timestamp']
1288 # We get the available subtitles
1289 list_params
= compat_urllib_parse_urlencode({
1294 list_url
= caption_url
+ '&' + list_params
1295 caption_list
= self
._download
_xml
(list_url
, video_id
)
1296 original_lang_node
= caption_list
.find('track')
1297 if original_lang_node
is None:
1298 self
._downloader
.report_warning('Video doesn\'t have automatic captions')
1300 original_lang
= original_lang_node
.attrib
['lang_code']
1301 caption_kind
= original_lang_node
.attrib
.get('kind', '')
1304 for lang_node
in caption_list
.findall('target'):
1305 sub_lang
= lang_node
.attrib
['lang_code']
1307 for ext
in self
._SUBTITLE
_FORMATS
:
1308 params
= compat_urllib_parse_urlencode({
1309 'lang': original_lang
,
1313 'kind': caption_kind
,
1315 sub_formats
.append({
1316 'url': caption_url
+ '&' + params
,
1319 sub_lang_list
[sub_lang
] = sub_formats
1320 return sub_lang_list
1322 def make_captions(sub_url
, sub_langs
):
1323 parsed_sub_url
= compat_urllib_parse_urlparse(sub_url
)
1324 caption_qs
= compat_parse_qs(parsed_sub_url
.query
)
1326 for sub_lang
in sub_langs
:
1328 for ext
in self
._SUBTITLE
_FORMATS
:
1330 'tlang': [sub_lang
],
1333 sub_url
= compat_urlparse
.urlunparse(parsed_sub_url
._replace
(
1334 query
=compat_urllib_parse_urlencode(caption_qs
, True)))
1335 sub_formats
.append({
1339 captions
[sub_lang
] = sub_formats
1342 # New captions format as of 22.06.2017
1343 player_response
= args
.get('player_response')
1344 if player_response
and isinstance(player_response
, compat_str
):
1345 player_response
= self
._parse
_json
(
1346 player_response
, video_id
, fatal
=False)
1348 renderer
= player_response
['captions']['playerCaptionsTracklistRenderer']
1349 base_url
= renderer
['captionTracks'][0]['baseUrl']
1351 for lang
in renderer
['translationLanguages']:
1352 lang_code
= lang
.get('languageCode')
1354 sub_lang_list
.append(lang_code
)
1355 return make_captions(base_url
, sub_lang_list
)
1357 # Some videos don't provide ttsurl but rather caption_tracks and
1358 # caption_translation_languages (e.g. 20LmZk1hakA)
1359 # Does not used anymore as of 22.06.2017
1360 caption_tracks
= args
['caption_tracks']
1361 caption_translation_languages
= args
['caption_translation_languages']
1362 caption_url
= compat_parse_qs(caption_tracks
.split(',')[0])['u'][0]
1364 for lang
in caption_translation_languages
.split(','):
1365 lang_qs
= compat_parse_qs(compat_urllib_parse_unquote_plus(lang
))
1366 sub_lang
= lang_qs
.get('lc', [None])[0]
1368 sub_lang_list
.append(sub_lang
)
1369 return make_captions(caption_url
, sub_lang_list
)
1370 # An extractor error can be raise by the download process if there are
1371 # no automatic captions but there are subtitles
1372 except (KeyError, IndexError, ExtractorError
):
1373 self
._downloader
.report_warning(err_msg
)
1376 def _mark_watched(self
, video_id
, video_info
):
1377 playback_url
= video_info
.get('videostats_playback_base_url', [None])[0]
1378 if not playback_url
:
1380 parsed_playback_url
= compat_urlparse
.urlparse(playback_url
)
1381 qs
= compat_urlparse
.parse_qs(parsed_playback_url
.query
)
1383 # cpn generation algorithm is reverse engineered from base.js.
1384 # In fact it works even with dummy cpn.
1385 CPN_ALPHABET
= 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1386 cpn
= ''.join((CPN_ALPHABET
[random
.randint(0, 256) & 63] for _
in range(0, 16)))
1392 playback_url
= compat_urlparse
.urlunparse(
1393 parsed_playback_url
._replace
(query
=compat_urllib_parse_urlencode(qs
, True)))
1395 self
._download
_webpage
(
1396 playback_url
, video_id
, 'Marking watched',
1397 'Unable to mark watched', fatal
=False)
1400 def _extract_urls(webpage
):
1401 # Embedded YouTube player
1403 unescapeHTML(mobj
.group('url'))
1404 for mobj
in re
.finditer(r
'''(?x)
1414 (?P
<url
>(?
:https?
:)?
//(?
:www\
.)?
youtube(?
:-nocookie
)?\
.com
/
1415 (?
:embed|v|p
)/[0-9A
-Za
-z_
-]{11}
.*?
)
1418 # lazyYT YouTube embed
1419 entries.extend(list(map(
1421 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1423 # Wordpress "YouTube Video Importer" plugin
1424 matches = re.findall(r'''(?x
)<div
[^
>]+
1425 class=(?P
<q1
>[\'"])[^\'"]*\byvii
_single
_video
_player
\b[^
\'"]*(?P=q1)[^>]+
1426 data-video_id=(?P<q2>[\'"])([^
\'"]+)(?P=q2)''', webpage)
1427 entries.extend(m[-1] for m in matches)
1432 def _extract_url(webpage):
1433 urls = YoutubeIE._extract_urls(webpage)
1434 return urls[0] if urls else None
1437 def extract_id(cls, url):
1438 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1440 raise ExtractorError('Invalid URL: %s' % url)
1441 video_id = mobj.group(2)
1444 def _extract_annotations(self, video_id):
1445 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1446 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1449 def _extract_chapters(description, duration):
1452 chapter_lines = re.findall(
1453 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\
.www\
.watch\
.player\
.seekTo
[^
>]+>(\d
{1,2}:\d
{1,2}(?
::\d
{1,2})?
)</a
>[^
>]*)(?
=$|
<br\s
*/>)',
1455 if not chapter_lines:
1458 for next_num, (chapter_line, time_point) in enumerate(
1459 chapter_lines, start=1):
1460 start_time = parse_duration(time_point)
1461 if start_time is None:
1463 if start_time > duration:
1465 end_time = (duration if next_num == len(chapter_lines)
1466 else parse_duration(chapter_lines[next_num][1]))
1467 if end_time is None:
1469 if end_time > duration:
1471 if start_time > end_time:
1473 chapter_title = re.sub(
1474 r'<a
[^
>]+>[^
<]+</a
>', '', chapter_line).strip(' \t-')
1475 chapter_title = re.sub(r'\s
+', ' ', chapter_title)
1477 'start_time
': start_time,
1478 'end_time
': end_time,
1479 'title
': chapter_title,
1483 def _real_extract(self, url):
1484 url, smuggled_data = unsmuggle_url(url, {})
1487 'http
' if self._downloader.params.get('prefer_insecure
', False)
1492 parsed_url = compat_urllib_parse_urlparse(url)
1493 for component in [parsed_url.fragment, parsed_url.query]:
1494 query = compat_parse_qs(component)
1495 if start_time is None and 't
' in query:
1496 start_time = parse_duration(query['t
'][0])
1497 if start_time is None and 'start
' in query:
1498 start_time = parse_duration(query['start
'][0])
1499 if end_time is None and 'end
' in query:
1500 end_time = parse_duration(query['end
'][0])
1502 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1503 mobj = re.search(self._NEXT_URL_RE, url)
1505 url = proto + '://www
.youtube
.com
/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1506 video_id = self.extract_id(url)
1509 url = proto + '://www
.youtube
.com
/watch?v
=%s&gl
=US
&hl
=en
&has_verified
=1&bpctr
=9999999999' % video_id
1510 video_webpage = self._download_webpage(url, video_id)
1512 # Attempt to extract SWF player URL
1513 mobj = re.search(r'swfConfig
.*?
"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1514 if mobj is not None:
1515 player_url = re.sub(r'\\(.)', r'\
1', mobj.group(1))
1521 def add_dash_mpd(video_info):
1522 dash_mpd = video_info.get('dashmpd
')
1523 if dash_mpd and dash_mpd[0] not in dash_mpds:
1524 dash_mpds.append(dash_mpd[0])
1529 def extract_view_count(v_info):
1530 return int_or_none(try_get(v_info, lambda x: x['view_count
'][0]))
1533 embed_webpage = None
1534 if re.search(r'player
-age
-gate
-content
">', video_webpage) is not None:
1536 # We simulate the access to the video from www.youtube.com/v/{video_id}
1537 # this can be viewed without login into Youtube
1538 url = proto + '://www.youtube.com/embed/%s' % video_id
1539 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1540 data = compat_urllib_parse_urlencode({
1541 'video_id': video_id,
1542 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1543 'sts': self._search_regex(
1544 r'"sts
"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1546 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1547 video_info_webpage = self._download_webpage(
1548 video_info_url, video_id,
1549 note='Refetching age-gated info webpage',
1550 errnote='unable to download video info webpage')
1551 video_info = compat_parse_qs(video_info_webpage)
1552 add_dash_mpd(video_info)
1557 # Try looking directly into the video webpage
1558 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1560 args = ytplayer_config['args']
1561 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1562 # Convert to the same format returned by compat_parse_qs
1563 video_info = dict((k, [v]) for k, v in args.items())
1564 add_dash_mpd(video_info)
1565 # Rental video is not rented but preview is available (e.g.
1566 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1567 # https://github.com/rg3/youtube-dl/issues/10532)
1568 if not video_info and args.get('ypc_vid'):
1569 return self.url_result(
1570 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1571 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1573 sts = ytplayer_config.get('sts')
1574 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1575 # We also try looking in get_video_info since it may contain different dashmpd
1576 # URL that points to a DASH manifest with possibly different itag set (some itags
1577 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1578 # manifest pointed by get_video_info's dashmpd).
1579 # The general idea is to take a union of itags of both DASH manifests (for example
1580 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1581 self.report_video_info_webpage_download(video_id)
1582 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1584 'video_id': video_id,
1594 video_info_webpage = self._download_webpage(
1595 '%s://www.youtube.com/get_video_info' % proto,
1596 video_id, note=False,
1597 errnote='unable to download video info webpage',
1598 fatal=False, query=query)
1599 if not video_info_webpage:
1601 get_video_info = compat_parse_qs(video_info_webpage)
1602 add_dash_mpd(get_video_info)
1603 if view_count is None:
1604 view_count = extract_view_count(get_video_info)
1606 video_info = get_video_info
1607 if 'token' in get_video_info:
1608 # Different get_video_info requests may report different results, e.g.
1609 # some may report video unavailability, but some may serve it without
1610 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1611 # the original webpage as well as el=info and el=embedded get_video_info
1612 # requests report video unavailability due to geo restriction while
1613 # el=detailpage succeeds and returns valid data). This is probably
1614 # due to YouTube measures against IP ranges of hosting providers.
1615 # Working around by preferring the first succeeded video_info containing
1616 # the token if no such video_info yet was found.
1617 if 'token' not in video_info:
1618 video_info = get_video_info
1621 def extract_unavailable_message():
1622 return self._html_search_regex(
1623 r'(?s)<h1[^>]+id="unavailable
-message
"[^>]*>(.+?)</h1>',
1624 video_webpage, 'unavailable message', default=None)
1626 if 'token' not in video_info:
1627 if 'reason' in video_info:
1628 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1629 regions_allowed = self._html_search_meta(
1630 'regionsAllowed', video_webpage, default=None)
1631 countries = regions_allowed.split(',') if regions_allowed else None
1632 self.raise_geo_restricted(
1633 msg=video_info['reason'][0], countries=countries)
1634 reason = video_info['reason'][0]
1635 if 'Invalid parameters' in reason:
1636 unavailable_message = extract_unavailable_message()
1637 if unavailable_message:
1638 reason = unavailable_message
1639 raise ExtractorError(
1640 'YouTube said: %s' % reason,
1641 expected=True, video_id=video_id)
1643 raise ExtractorError(
1644 '"token
" parameter not in video info for unknown reason',
1648 if 'title' in video_info:
1649 video_title = video_info['title'][0]
1651 self._downloader.report_warning('Unable to extract video title')
1655 description_original = video_description = get_element_by_id("eow
-description
", video_webpage)
1656 if video_description:
1659 redir_url = compat_urlparse.urljoin(url, m.group(1))
1660 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1661 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1662 qs = compat_parse_qs(parsed_redir_url.query)
1668 description_original = video_description = re.sub(r'''(?x)
1670 (?:[a-zA-Z-]+="[^
"]*"\s
+)*?
1671 (?
:title|href
)="([^"]+)"\s+
1672 (?:[a-zA-Z-]+="[^
"]*"\s
+)*?
1676 ''', replace_url, video_description)
1677 video_description = clean_html(video_description)
1679 fd_mobj = re.search(r'<meta name="description
" content="([^
"]+)"', video_webpage)
1681 video_description = unescapeHTML(fd_mobj.group(1))
1683 video_description = ''
1685 if 'multifeed_metadata_list
' in video_info and not smuggled_data.get('force_singlefeed
', False):
1686 if not self._downloader.params.get('noplaylist
'):
1689 multifeed_metadata_list = video_info['multifeed_metadata_list
'][0]
1690 for feed in multifeed_metadata_list.split(','):
1691 # Unquote should take place before split on comma (,) since textual
1692 # fields may contain comma as well (see
1693 # https://github.com/rg3/youtube-dl/issues/8536)
1694 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1696 '_type
': 'url_transparent
',
1697 'ie_key
': 'Youtube
',
1699 '%s://www
.youtube
.com
/watch?v
=%s' % (proto, feed_data['id'][0]),
1700 {'force_singlefeed
': True}),
1701 'title
': '%s (%s)' % (video_title, feed_data['title
'][0]),
1703 feed_ids.append(feed_data['id'][0])
1705 'Downloading multifeed
video (%s) - add
--no
-playlist to just download video
%s'
1706 % (', '.join(feed_ids), video_id))
1707 return self.playlist_result(entries, video_id, video_title, video_description)
1708 self.to_screen('Downloading just video
%s because of
--no
-playlist
' % video_id)
1710 if view_count is None:
1711 view_count = extract_view_count(video_info)
1713 # Check for "rental" videos
1714 if 'ypc_video_rental_bar_text
' in video_info and 'author
' not in video_info:
1715 raise ExtractorError('"rental" videos
not supported
. See https
://github
.com
/rg3
/youtube
-dl
/issues
/359 for more information
.', expected=True)
1717 def _extract_filesize(media_url):
1718 return int_or_none(self._search_regex(
1719 r'\bclen
[=/](\d
+)', media_url, 'filesize
', default=None))
1721 if 'conn
' in video_info and video_info['conn
'][0].startswith('rtmp
'):
1722 self.report_rtmp_download()
1724 'format_id
': '_rtmp
',
1726 'url
': video_info['conn
'][0],
1727 'player_url
': player_url,
1729 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map
', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts
', [''])[0]) >= 1):
1730 encoded_url_map = video_info.get('url_encoded_fmt_stream_map
', [''])[0] + ',' + video_info.get('adaptive_fmts
', [''])[0]
1731 if 'rtmpe
%3Dyes
' in encoded_url_map:
1732 raise ExtractorError('rtmpe downloads are
not supported
, see https
://github
.com
/rg3
/youtube
-dl
/issues
/343 for more information
.', expected=True)
1734 fmt_list = video_info.get('fmt_list
', [''])[0]
1736 for fmt in fmt_list.split(','):
1737 spec = fmt.split('/')
1739 width_height = spec[1].split('x
')
1740 if len(width_height) == 2:
1741 formats_spec[spec[0]] = {
1742 'resolution
': spec[1],
1743 'width
': int_or_none(width_height[0]),
1744 'height
': int_or_none(width_height[1]),
1746 q = qualities(['small
', 'medium
', 'hd720
'])
1748 for url_data_str in encoded_url_map.split(','):
1749 url_data = compat_parse_qs(url_data_str)
1750 if 'itag
' not in url_data or 'url
' not in url_data:
1752 format_id = url_data['itag
'][0]
1753 url = url_data['url
'][0]
1755 if 's
' in url_data or self._downloader.params.get('youtube_include_dash_manifest
', True):
1756 ASSETS_RE = r'"assets":.+?
"js":\s
*("[^"]+")'
1757 jsplayer_url_json = self._search_regex(
1759 embed_webpage if age_gate else video_webpage,
1760 'JS player URL (1)', default=None)
1761 if not jsplayer_url_json and not age_gate:
1762 # We need the embed website after all
1763 if embed_webpage is None:
1764 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1765 embed_webpage = self._download_webpage(
1766 embed_url, video_id, 'Downloading embed webpage')
1767 jsplayer_url_json = self._search_regex(
1768 ASSETS_RE, embed_webpage, 'JS player URL')
1770 player_url = json.loads(jsplayer_url_json)
1771 if player_url is None:
1772 player_url_json = self._search_regex(
1773 r'ytplayer\.config.*?"url
"\s*:\s*("[^
"]+")',
1774 video_webpage, 'age gate player URL
')
1775 player_url = json.loads(player_url_json)
1777 if 'sig
' in url_data:
1778 url += '&signature
=' + url_data['sig
'][0]
1779 elif 's
' in url_data:
1780 encrypted_sig = url_data['s
'][0]
1782 if self._downloader.params.get('verbose
'):
1783 if player_url is None:
1784 player_version = 'unknown
'
1785 player_desc = 'unknown
'
1787 if player_url.endswith('swf
'):
1788 player_version = self._search_regex(
1789 r'-(.+?
)(?
:/watch_as3
)?\
.swf$
', player_url,
1790 'flash player
', fatal=False)
1791 player_desc = 'flash player
%s' % player_version
1793 player_version = self._search_regex(
1794 [r'html5player
-([^
/]+?
)(?
:/html5player(?
:-new
)?
)?\
.js
',
1795 r'(?
:www|player
)-([^
/]+)(?
:/[a
-z
]{2}_
[A
-Z
]{2}
)?
/base\
.js
'],
1797 'html5 player
', fatal=False)
1798 player_desc = 'html5 player
%s' % player_version
1800 parts_sizes = self._signature_cache_id(encrypted_sig)
1801 self.to_screen('{%s} signature length
%s, %s' %
1802 (format_id, parts_sizes, player_desc))
1804 signature = self._decrypt_signature(
1805 encrypted_sig, video_id, player_url, age_gate)
1806 url += '&signature
=' + signature
1807 if 'ratebypass
' not in url:
1808 url += '&ratebypass
=yes
'
1811 'format_id
': format_id,
1813 'player_url
': player_url,
1815 if format_id in self._formats:
1816 dct.update(self._formats[format_id])
1817 if format_id in formats_spec:
1818 dct.update(formats_spec[format_id])
1820 # Some itags are not included in DASH manifest thus corresponding formats will
1821 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1822 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1823 mobj = re.search(r'^
(?P
<width
>\d
+)[xX
](?P
<height
>\d
+)$
', url_data.get('size
', [''])[0])
1824 width, height = (int(mobj.group('width
')), int(mobj.group('height
'))) if mobj else (None, None)
1826 filesize = int_or_none(url_data.get(
1827 'clen
', [None])[0]) or _extract_filesize(url)
1829 quality = url_data.get('quality_label
', [None])[0] or url_data.get('quality
', [None])[0]
1832 'filesize
': filesize,
1833 'tbr
': float_or_none(url_data.get('bitrate
', [None])[0], 1000),
1836 'fps
': int_or_none(url_data.get('fps
', [None])[0]),
1837 'format_note
': quality,
1838 'quality
': q(quality),
1840 for key, value in more_fields.items():
1843 type_ = url_data.get('type', [None])[0]
1845 type_split = type_.split(';')
1846 kind_ext = type_split[0].split('/')
1847 if len(kind_ext) == 2:
1849 dct['ext
'] = mimetype2ext(type_split[0])
1850 if kind in ('audio
', 'video
'):
1852 for mobj in re.finditer(
1853 r'(?P
<key
>[a
-zA
-Z_
-]+)=(?P
<quote
>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1854 if mobj.group('key') == 'codecs':
1855 codecs = mobj.group('val')
1858 dct.update(parse_codecs(codecs))
1859 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1860 dct['downloader_options'] = {
1861 # Youtube throttles chunks >~10M
1862 'http_chunk_size': 10485760,
1865 elif video_info.get('hlsvp'):
1866 manifest_url = video_info['hlsvp'][0]
1868 m3u8_formats = self._extract_m3u8_formats(
1869 manifest_url, video_id, 'mp4', fatal=False)
1870 for a_format in m3u8_formats:
1871 itag = self._search_regex(
1872 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1874 a_format['format_id'] = itag
1875 if itag in self._formats:
1876 dct = self._formats[itag].copy()
1877 dct.update(a_format)
1879 a_format['player_url'] = player_url
1880 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1881 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1882 formats.append(a_format)
1884 error_message = clean_html(video_info.get('reason', [None])[0])
1885 if not error_message:
1886 error_message = extract_unavailable_message()
1888 raise ExtractorError(error_message, expected=True)
1889 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1892 video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1894 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1896 self._downloader.report_warning('unable to extract uploader name')
1899 video_uploader_id = None
1900 video_uploader_url = None
1902 r'<link itemprop="url
" href="(?P
<uploader_url
>https?
://www\
.youtube\
.com
/(?
:user|channel
)/(?P
<uploader_id
>[^
"]+))">',
1904 if mobj is not None:
1905 video_uploader_id = mobj.group('uploader_id
')
1906 video_uploader_url = mobj.group('uploader_url
')
1908 self._downloader.report_warning('unable to extract uploader nickname
')
1911 # We try first to get a high quality image:
1912 m_thumb = re.search(r'<span itemprop
="thumbnail".*?href
="(.*?)">',
1913 video_webpage, re.DOTALL)
1914 if m_thumb is not None:
1915 video_thumbnail = m_thumb.group(1)
1916 elif 'thumbnail_url
' not in video_info:
1917 self._downloader.report_warning('unable to extract video thumbnail
')
1918 video_thumbnail = None
1919 else: # don't panic
if we can
't find it
1920 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url
'][0])
1923 upload_date = self._html_search_meta(
1924 'datePublished
', video_webpage, 'upload date
', default=None)
1926 upload_date = self._search_regex(
1927 [r'(?s
)id="eow-date.*?>(.*?)</span>',
1928 r'(?:id="watch
-uploader
-info
".*?>.*?|["\']simpleText
["\']\s*:\s*["\'])(?
:Published|Uploaded|Streamed live|Started
) on (.+?
)[<"\']'],
1929 video_webpage, 'upload date', default=None)
1930 upload_date = unified_strdate(upload_date)
1932 video_license = self._html_search_regex(
1933 r'<h4[^>]+class="title
"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1934 video_webpage, 'license', default=None)
1936 m_music = re.search(
1938 <h4[^>]+class="title
"[^>]*>\s*Music\s*</h4>\s*
1946 \bhref=["\']/red
[^
>]*>|
# drop possible
1947 >\s
*Listen ad
-free
with YouTube Red
# YouTube Red ad
1954 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1955 video_creator = clean_html(m_music.group('creator'))
1957 video_alt_title = video_creator = None
1959 def extract_meta(field):
1960 return self._html_search_regex(
1961 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1962 video_webpage, field, default=None)
1964 track = extract_meta('Song')
1965 artist = extract_meta('Artist')
1967 m_episode = re.search(
1968 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*ā¢\s*E(?P<episode>\d+)</span>',
1971 series = m_episode.group('series')
1972 season_number = int(m_episode.group('season'))
1973 episode_number = int(m_episode.group('episode'))
1975 series = season_number = episode_number = None
1977 m_cat_container = self._search_regex(
1978 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1979 video_webpage, 'categories', default=None)
1981 category = self._html_search_regex(
1982 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1984 video_categories = None if category is None else [category]
1986 video_categories = None
1989 unescapeHTML(m.group('content'))
1990 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1992 def _extract_count(count_name):
1993 return str_to_int(self._search_regex(
1994 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1995 % re.escape(count_name),
1996 video_webpage, count_name, default=None))
1998 like_count = _extract_count('like')
1999 dislike_count = _extract_count('dislike')
2002 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2003 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2005 video_duration = try_get(
2006 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2007 if not video_duration:
2008 video_duration = parse_duration(self._html_search_meta(
2009 'duration', video_webpage, 'video duration'))
2012 video_annotations = None
2013 if self._downloader.params.get('writeannotations', False):
2014 video_annotations = self._extract_annotations(video_id)
2016 chapters = self._extract_chapters(description_original, video_duration)
2018 # Look for the DASH manifest
2019 if self._downloader.params.get('youtube_include_dash_manifest', True):
2020 dash_mpd_fatal = True
2021 for mpd_url in dash_mpds:
2024 def decrypt_sig(mobj):
2026 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2027 return '/signature/%s' % dec_s
2029 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2031 for df in self._extract_mpd_formats(
2032 mpd_url, video_id, fatal=dash_mpd_fatal,
2033 formats_dict=self._formats):
2034 if not df.get('filesize'):
2035 df['filesize'] = _extract_filesize(df['url'])
2036 # Do not overwrite DASH format found in some previous DASH manifest
2037 if df['format_id'] not in dash_formats:
2038 dash_formats[df['format_id']] = df
2039 # Additional DASH manifests may end up in HTTP Error 403 therefore
2040 # allow them to fail without bug report message if we already have
2041 # some DASH manifest succeeded. This is temporary workaround to reduce
2042 # burst of bug reports until we figure out the reason and whether it
2043 # can be fixed at all.
2044 dash_mpd_fatal = False
2045 except (ExtractorError, KeyError) as e:
2046 self.report_warning(
2047 'Skipping DASH manifest: %r' % e, video_id)
2049 # Remove the formats we found through non-DASH, they
2050 # contain less info and it can be wrong, because we use
2051 # fixed values (for example the resolution). See
2052 # https://github.com/rg3/youtube-dl/issues/5774 for an
2054 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2055 formats.extend(dash_formats.values())
2057 # Check for malformed aspect ratio
2058 stretched_m = re.search(
2059 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2062 w = float(stretched_m.group('w'))
2063 h = float(stretched_m.group('h'))
2064 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2065 # We will only process correct ratios.
2069 if f.get('vcodec') != 'none':
2070 f['stretched_ratio'] = ratio
2072 self._sort_formats(formats)
2074 self.mark_watched(video_id, video_info)
2078 'uploader': video_uploader,
2079 'uploader_id': video_uploader_id,
2080 'uploader_url': video_uploader_url,
2081 'upload_date': upload_date,
2082 'license': video_license,
2083 'creator': video_creator or artist,
2084 'title': video_title,
2085 'alt_title': video_alt_title or track,
2086 'thumbnail': video_thumbnail,
2087 'description': video_description,
2088 'categories': video_categories,
2090 'subtitles': video_subtitles,
2091 'automatic_captions': automatic_captions,
2092 'duration': video_duration,
2093 'age_limit': 18 if age_gate else 0,
2094 'annotations': video_annotations,
2095 'chapters': chapters,
2096 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2097 'view_count': view_count,
2098 'like_count': like_count,
2099 'dislike_count': dislike_count,
2100 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2103 'start_time': start_time,
2104 'end_time': end_time,
2106 'season_number': season_number,
2107 'episode_number': episode_number,
2113 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2114 IE_DESC = 'YouTube.com playlists'
2115 _VALID_URL = r"""(?x)(?:
2121 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2122 \? (?:.*?[&;])*? (?:p|a|list)=
2125 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2128 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
2129 # Top tracks, they can also include dots
2135 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2136 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2137 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2138 IE_NAME = 'youtube:playlist'
2140 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2142 'title': 'ytdl test PL',
2143 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2145 'playlist_count': 3,
2147 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2149 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2150 'title': 'YDL_Empty_List',
2152 'playlist_count': 0,
2153 'skip': 'This playlist is private',
2155 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2156 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2158 'title': '29C3: Not my department',
2159 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2161 'playlist_count': 95,
2163 'note': 'issue #673',
2164 'url': 'PLBB231211A4F62143',
2166 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2167 'id': 'PLBB231211A4F62143',
2169 'playlist_mincount': 26,
2171 'note': 'Large playlist',
2172 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2174 'title': 'Uploads from Cauchemar',
2175 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2177 'playlist_mincount': 799,
2179 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2181 'title': 'YDL_safe_search',
2182 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2184 'playlist_count': 2,
2185 'skip': 'This playlist is private',
2188 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2189 'playlist_count': 4,
2192 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2195 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2196 'playlist_mincount': 485,
2198 'title': '2017 čÆčŖęę°å®ę² (2/24ę“ę°)',
2199 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2202 'note': 'Embedded SWF player',
2203 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2204 'playlist_count': 4,
2207 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2210 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2211 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2213 'title': 'Uploads from Interstellar Movie',
2214 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2216 'playlist_mincount': 21,
2218 # Playlist URL that does not actually serve a playlist
2219 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2221 'id': 'FqZTN594JQw',
2223 'title': "Smiley's People 01 detective, Adventure Series, Action",
2224 'uploader': 'STREEM',
2225 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2226 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2227 'upload_date': '20150526',
2228 'license': 'Standard YouTube License',
2229 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2230 'categories': ['People & Blogs'],
2233 'dislike_count': int,
2236 'skip_download': True,
2238 'add_ie': [YoutubeIE.ie_key()],
2240 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2242 'id': 'yeWKywCrFtk',
2244 'title': 'Small Scale Baler and Braiding Rugs',
2245 'uploader': 'Backus-Page House Museum',
2246 'uploader_id': 'backuspagemuseum',
2247 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2248 'upload_date': '20161008',
2249 'license': 'Standard YouTube License',
2250 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2251 'categories': ['Nonprofits & Activism'],
2254 'dislike_count': int,
2258 'skip_download': True,
2261 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2262 'only_matching': True,
2264 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2265 'only_matching': True,
2267 # music album playlist
2268 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2269 'only_matching': True,
2272 def _real_initialize(self):
2275 def _extract_mix(self, playlist_id):
2276 # The mixes are generated from a single video
2277 # the id of the playlist is just 'RD' + video_id
2279 last_id = playlist_id[-11:]
2280 for n in itertools.count(1):
2281 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2282 webpage = self._download_webpage(
2283 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2284 new_ids = orderedSet(re.findall(
2285 r'''(?xs
)data
-video
-username
=".*?".*?
2286 href
="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?
list=%s''' % re.escape(playlist_id),
2288 # Fetch new pages until all the videos are repeated, it seems that
2289 # there are always 51 unique videos.
2290 new_ids = [_id for _id in new_ids if _id not in ids]
2296 url_results = self._ids_to_results(ids)
2298 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2300 search_title('playlist-title') or
2301 search_title('title long-title') or
2302 search_title('title'))
2303 title = clean_html(title_span)
2305 return self.playlist_result(url_results, playlist_id, title)
2307 def _extract_playlist(self, playlist_id):
2308 url = self._TEMPLATE_URL % playlist_id
2309 page = self._download_webpage(url, playlist_id)
2311 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2312 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2313 match = match.strip()
2314 # Check if the playlist exists or is private
2315 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2317 reason = mobj.group('reason')
2318 message = 'This playlist %s' % reason
2319 if 'private' in reason:
2320 message += ', use --username or --netrc to access it'
2322 raise ExtractorError(message, expected=True)
2323 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2324 raise ExtractorError(
2325 'Invalid parameters. Maybe URL is incorrect.',
2327 elif re.match(r'[^<]*Choose your language[^<]*', match):
2330 self.report_warning('Youtube gives an alert message: ' + match)
2332 playlist_title = self._html_search_regex(
2333 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2334 page, 'title', default=None)
2336 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2337 uploader = self._search_regex(
2338 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2339 page, 'uploader', default=None)
2341 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2344 uploader_id = mobj.group('uploader_id')
2345 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2347 uploader_id = uploader_url = None
2351 if not playlist_title:
2353 # Some playlist URLs don't actually serve a playlist (e.g.
2354 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2355 next(self._entries(page, playlist_id))
2356 except StopIteration:
2359 playlist = self.playlist_result(
2360 self._entries(page, playlist_id), playlist_id, playlist_title)
2362 'uploader': uploader,
2363 'uploader_id': uploader_id,
2364 'uploader_url': uploader_url,
2367 return has_videos, playlist
2369 def _check_download_just_video(self, url, playlist_id):
2370 # Check if it's a video-specific URL
2371 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2372 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2373 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2374 'video id', default=None)
2376 if self._downloader.params.get('noplaylist'):
2377 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2378 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2380 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2381 return video_id, None
2384 def _real_extract(self, url):
2385 # Extract playlist id
2386 mobj = re.match(self._VALID_URL, url)
2388 raise ExtractorError('Invalid URL: %s' % url)
2389 playlist_id = mobj.group(1) or mobj.group(2)
2391 video_id, video = self._check_download_just_video(url, playlist_id)
2395 if playlist_id.startswith(('RD', 'UL', 'PU')):
2396 # Mixes require a custom extraction process
2397 return self._extract_mix(playlist_id)
2399 has_videos, playlist = self._extract_playlist(playlist_id)
2400 if has_videos or not video_id:
2403 # Some playlist URLs don't actually serve a playlist (see
2404 # https://github.com/rg3/youtube-dl/issues/10537).
2405 # Fallback to plain video extraction if there is a video id
2406 # along with playlist id.
2407 return self.url_result(video_id, 'Youtube', video_id=video_id)
2410 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2411 IE_DESC = 'YouTube.com channels'
2412 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2413 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2414 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2415 IE_NAME = 'youtube:channel'
2417 'note': 'paginated channel',
2418 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2419 'playlist_mincount': 91,
2421 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2422 'title': 'Uploads from lex will',
2425 'note': 'Age restricted channel',
2426 # from https://www.youtube.com/user/DeusExOfficial
2427 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2428 'playlist_mincount': 64,
2430 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2431 'title': 'Uploads from Deus Ex',
2436 def suitable(cls, url):
2437 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2438 else super(YoutubeChannelIE, cls).suitable(url))
2440 def _build_template_url(self, url, channel_id):
2441 return self._TEMPLATE_URL % channel_id
2443 def _real_extract(self, url):
2444 channel_id = self._match_id(url)
2446 url = self._build_template_url(url, channel_id)
2448 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2449 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2450 # otherwise fallback on channel by page extraction
2451 channel_page = self._download_webpage(
2452 url + '?view=57', channel_id,
2453 'Downloading channel page', fatal=False)
2454 if channel_page is False:
2455 channel_playlist_id = False
2457 channel_playlist_id = self._html_search_meta(
2458 'channelId', channel_page, 'channel id', default=None)
2459 if not channel_playlist_id:
2460 channel_url = self._html_search_meta(
2461 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2462 channel_page, 'channel url', default=None)
2464 channel_playlist_id = self._search_regex(
2465 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2466 channel_url, 'channel id', default=None)
2467 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2468 playlist_id = 'UU' + channel_playlist_id[2:]
2469 return self.url_result(
2470 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2472 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2473 autogenerated = re.search(r'''(?x
)
2475 channel
-header
-autogenerated
-label|
2476 yt
-channel
-title
-autogenerated
2477 )[^
"]*"''', channel_page) is not None
2480 # The videos are contained in a single page
2481 # the ajax pages can't be used, they are empty
2484 video_id, 'Youtube', video_id=video_id,
2485 video_title=video_title)
2486 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2487 return self.playlist_result(entries, channel_id)
2490 next(self._entries(channel_page, channel_id))
2491 except StopIteration:
2492 alert_message = self._html_search_regex(
2493 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2494 channel_page, 'alert', default=None, group='alert')
2496 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2498 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2501 class YoutubeUserIE(YoutubeChannelIE):
2502 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2503 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2504 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2505 IE_NAME = 'youtube:user'
2508 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2509 'playlist_mincount': 320,
2511 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2512 'title': 'Uploads from The Linux Foundation',
2515 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2516 # but not https://www.youtube.com/user/12minuteathlete/videos
2517 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2518 'playlist_mincount': 249,
2520 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2521 'title': 'Uploads from 12 Minute Athlete',
2524 'url': 'ytuser:phihag',
2525 'only_matching': True,
2527 'url': 'https://www.youtube.com/c/gametrailers',
2528 'only_matching': True,
2530 'url': 'https://www.youtube.com/gametrailers',
2531 'only_matching': True,
2533 # This channel is not available, geo restricted to JP
2534 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2535 'only_matching': True,
2539 def suitable(cls, url):
2540 # Don't return True if the url can be extracted with other youtube
2541 # extractor, the regex would is too permissive and it would match.
2542 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2543 if any(ie.suitable(url) for ie in other_yt_ies):
2546 return super(YoutubeUserIE, cls).suitable(url)
2548 def _build_template_url(self, url, channel_id):
2549 mobj = re.match(self._VALID_URL, url)
2550 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2553 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2554 IE_DESC = 'YouTube.com live streams'
2555 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2556 IE_NAME = 'youtube:live'
2559 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2561 'id': 'a48o2S1cPoo',
2563 'title': 'The Young Turks - Live Main Show',
2564 'uploader': 'The Young Turks',
2565 'uploader_id': 'TheYoungTurks',
2566 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2567 'upload_date': '20150715',
2568 'license': 'Standard YouTube License',
2569 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2570 'categories': ['News & Politics'],
2571 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2573 'dislike_count': int,
2576 'skip_download': True,
2579 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2580 'only_matching': True,
2582 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2583 'only_matching': True,
2585 'url': 'https://www.youtube.com/TheYoungTurks/live',
2586 'only_matching': True,
2589 def _real_extract(self, url):
2590 mobj = re.match(self._VALID_URL, url)
2591 channel_id = mobj.group('id')
2592 base_url = mobj.group('base_url')
2593 webpage = self._download_webpage(url, channel_id, fatal=False)
2595 page_type = self._og_search_property(
2596 'type', webpage, 'page type', default='')
2597 video_id = self._html_search_meta(
2598 'videoId', webpage, 'video id', default=None)
2599 if page_type.startswith('video') and video_id and re.match(
2600 r'^[0-9A-Za-z_-]{11}$', video_id):
2601 return self.url_result(video_id, YoutubeIE.ie_key())
2602 return self.url_result(base_url)
2605 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2606 IE_DESC = 'YouTube.com user/channel playlists'
2607 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2608 IE_NAME = 'youtube:playlists'
2611 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2612 'playlist_mincount': 4,
2614 'id': 'ThirstForScience',
2615 'title': 'Thirst for Science',
2618 # with "Load more" button
2619 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2620 'playlist_mincount': 70,
2623 'title': 'ŠŠ³Š¾ŃŃ ŠŠ»ŠµŠ¹Š½ŠµŃ',
2626 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2627 'playlist_mincount': 17,
2629 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2630 'title': 'Chem Player',
2635 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2636 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2639 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2640 IE_DESC = 'YouTube.com searches'
2641 # there doesn't appear to be a real limit, for example if you search for
2642 # 'python' you get more than 8.000.000 results
2643 _MAX_RESULTS = float('inf')
2644 IE_NAME = 'youtube:search'
2645 _SEARCH_KEY = 'ytsearch'
2646 _EXTRA_QUERY_ARGS = {}
2649 def _get_n_results(self, query, n):
2650 """Get a specified number of results for a query"""
2656 'search_query': query.encode('utf-8'),
2658 url_query.update(self._EXTRA_QUERY_ARGS)
2659 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2661 for pagenum in itertools.count(1):
2662 data = self._download_json(
2663 result_url, video_id='query "%s"' % query,
2664 note='Downloading page %s' % pagenum,
2665 errnote='Unable to download API page',
2666 query={'spf': 'navigate'})
2667 html_content = data[1]['body']['content']
2669 if 'class="search-message' in html_content:
2670 raise ExtractorError(
2671 '[youtube] No video results', expected=True)
2673 new_videos = list(self._process_page(html_content))
2674 videos += new_videos
2675 if not new_videos or len(videos) > limit:
2677 next_link = self._html_search_regex(
2678 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2679 html_content, 'next link', default=None)
2680 if next_link is None:
2682 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2686 return self.playlist_result(videos, query)
2689 class YoutubeSearchDateIE(YoutubeSearchIE):
2690 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2691 _SEARCH_KEY = 'ytsearchdate'
2692 IE_DESC = 'YouTube.com searches, newest videos first'
2693 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2696 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2697 IE_DESC = 'YouTube.com search URLs'
2698 IE_NAME = 'youtube:search_url'
2699 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2701 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2702 'playlist_mincount': 5,
2704 'title': 'youtube-dl test video',
2707 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2708 'only_matching': True,
2711 def _real_extract(self, url):
2712 mobj = re.match(self._VALID_URL, url)
2713 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2714 webpage = self._download_webpage(url, query)
2715 return self.playlist_result(self._process_page(webpage), playlist_title=query)
2718 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2719 IE_DESC = 'YouTube.com (multi-season) shows'
2720 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2721 IE_NAME = 'youtube:show'
2723 'url': 'https://www.youtube.com/show/airdisasters',
2724 'playlist_mincount': 5,
2726 'id': 'airdisasters',
2727 'title': 'Air Disasters',
2731 def _real_extract(self, url):
2732 playlist_id = self._match_id(url)
2733 return super(YoutubeShowIE, self)._real_extract(
2734 'https://www.youtube.com/show/%s/playlists' % playlist_id)
2737 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2739 Base class for feed extractors
2740 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2742 _LOGIN_REQUIRED = True
2746 return 'youtube:%s' % self._FEED_NAME
2748 def _real_initialize(self):
2751 def _entries(self, page):
2752 # The extraction process is the same as for playlists, but the regex
2753 # for the video ids doesn't contain an index
2755 more_widget_html = content_html = page
2756 for page_num in itertools.count(1):
2757 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2759 # 'recommended' feed has infinite 'load more' and each new portion spins
2760 # the same videos in (sometimes) slightly different order, so we'll check
2761 # for unicity and break when portion has no new videos
2762 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2768 for entry in self._ids_to_results(new_ids):
2771 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2775 more = self._download_json(
2776 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2777 'Downloading page #%s' % page_num,
2778 transform_source=uppercase_escape)
2779 content_html = more['content_html']
2780 more_widget_html = more['load_more_widget_html']
2782 def _real_extract(self, url):
2783 page = self._download_webpage(
2784 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2785 self._PLAYLIST_TITLE)
2786 return self.playlist_result(
2787 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2790 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2791 IE_NAME = 'youtube:watchlater'
2792 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2793 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2796 'url': 'https://www.youtube.com/playlist?list=WL',
2797 'only_matching': True,
2799 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2800 'only_matching': True,
2803 def _real_extract(self, url):
2804 _, video = self._check_download_just_video(url, 'WL')
2807 _, playlist = self._extract_playlist('WL')
2811 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2812 IE_NAME = 'youtube:favorites'
2813 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2814 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2815 _LOGIN_REQUIRED = True
2817 def _real_extract(self, url):
2818 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2819 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2820 return self.url_result(playlist_id, 'YoutubePlaylist')
2823 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2824 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2825 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2826 _FEED_NAME = 'recommended'
2827 _PLAYLIST_TITLE = 'Youtube Recommended videos'
2830 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2831 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2832 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2833 _FEED_NAME = 'subscriptions'
2834 _PLAYLIST_TITLE = 'Youtube Subscriptions'
2837 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2838 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2839 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2840 _FEED_NAME = 'history'
2841 _PLAYLIST_TITLE = 'Youtube History'
2844 class YoutubeTruncatedURLIE(InfoExtractor):
2845 IE_NAME = 'youtube:truncated_url'
2846 IE_DESC = False # Do not list
2847 _VALID_URL = r'''(?x
)
2849 (?
:\w
+\
.)?
[yY
][oO
][uU
][tT
][uU
][bB
][eE
](?
:-nocookie
)?\
.com
/
2852 annotation_id
=annotation_
[^
&]+|
2858 attribution_link
\?a
=[^
&]+
2864 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2865 'only_matching': True,
2867 'url': 'https://www.youtube.com/watch?',
2868 'only_matching': True,
2870 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2871 'only_matching': True,
2873 'url': 'https://www.youtube.com/watch?feature=foo',
2874 'only_matching': True,
2876 'url': 'https://www.youtube.com/watch?hl=en-GB',
2877 'only_matching': True,
2879 'url': 'https://www.youtube.com/watch?t=2372',
2880 'only_matching': True,
2883 def _real_extract(self, url):
2884 raise ExtractorError(
2885 'Did you forget to quote the URL? Remember that & is a meta '
2886 'character in most shells, so you want to put the URL in quotes, '
2888 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2889 ' or simply youtube-dl BaW_jenozKc .',
2893 class YoutubeTruncatedIDIE(InfoExtractor):
2894 IE_NAME = 'youtube:truncated_id'
2895 IE_DESC = False # Do not list
2896 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2899 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2900 'only_matching': True,
2903 def _real_extract(self, url):
2904 video_id = self._match_id(url)
2905 raise ExtractorError(
2906 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),