3 from __future__
import unicode_literals
14 from .common
import InfoExtractor
, SearchInfoExtractor
15 from ..jsinterp
import JSInterpreter
16 from ..swfinterp
import SWFInterpreter
17 from ..compat
import (
21 compat_urllib_parse_unquote
,
22 compat_urllib_parse_unquote_plus
,
23 compat_urllib_parse_urlencode
,
24 compat_urllib_parse_urlparse
,
33 get_element_by_attribute
,
54 class YoutubeBaseInfoExtractor(InfoExtractor
):
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL
= 'https://accounts.google.com/ServiceLogin'
57 _TWOFACTOR_URL
= 'https://accounts.google.com/signin/challenge'
59 _LOOKUP_URL
= 'https://accounts.google.com/_/signin/sl/lookup'
60 _CHALLENGE_URL
= 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL
= 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
63 _NETRC_MACHINE
= 'youtube'
64 # If True it will raise an error if no login info is provided
65 _LOGIN_REQUIRED
= False
67 _PLAYLIST_ID_RE
= r
'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
69 def _set_language(self
):
71 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
72 # YouTube sets the expire time to about two months
73 expire_time
=time
.time() + 2 * 30 * 24 * 3600)
75 def _ids_to_results(self
, ids
):
77 self
.url_result(vid_id
, 'Youtube', video_id
=vid_id
)
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 username
, password
= self
._get
_login
_info
()
89 # No authentication to be performed
91 if self
._LOGIN
_REQUIRED
and self
._downloader
.params
.get('cookiefile') is None:
92 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True)
95 login_page
= self
._download
_webpage
(
96 self
._LOGIN
_URL
, None,
97 note
='Downloading login page',
98 errnote
='unable to fetch login page', fatal
=False)
99 if login_page
is False:
102 login_form
= self
._hidden
_inputs
(login_page
)
104 def req(url
, f_req
, note
, errnote
):
105 data
= login_form
.copy()
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
112 'f.req': json
.dumps(f_req
),
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
116 return self
._download
_json
(
117 url
, None, note
=note
, errnote
=errnote
,
118 transform_source
=lambda s
: re
.sub(r
'^[^[]*', '', s
),
120 data
=urlencode_postdata(data
), headers
={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
126 self
._downloader
.report_warning(message
)
130 None, [], None, 'US', None, None, 2, False, True,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
136 1, [None, None, []], None, None, None, True
141 lookup_results
= req(
142 self
._LOOKUP
_URL
, lookup_req
,
143 'Looking up account info', 'Unable to look up account info')
145 if lookup_results
is False:
148 user_hash
= try_get(lookup_results
, lambda x
: x
[0][2], compat_str
)
150 warn('Unable to extract user hash')
155 None, 1, None, [1, None, None, None, [password
, None, True]],
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
161 challenge_results
= req(
162 self
._CHALLENGE
_URL
, challenge_req
,
163 'Logging in', 'Unable to log in')
165 if challenge_results
is False:
168 login_res
= try_get(challenge_results
, lambda x
: x
[0][5], list)
170 login_msg
= try_get(login_res
, lambda x
: x
[5], compat_str
)
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg
== 'INCORRECT_ANSWER_ENTERED' else login_msg
)
176 res
= try_get(challenge_results
, lambda x
: x
[0][-1], list)
178 warn('Unable to extract result entry')
181 tfa
= try_get(res
, lambda x
: x
[0][0], list)
183 tfa_str
= try_get(tfa
, lambda x
: x
[2], compat_str
)
184 if tfa_str
== 'TWO_STEP_VERIFICATION':
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
187 status
= try_get(tfa
, lambda x
: x
[5], compat_str
)
188 if status
== 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
192 tl
= try_get(challenge_results
, lambda x
: x
[1][2], compat_str
)
194 warn('Unable to extract TL')
197 tfa_code
= self
._get
_tfa
_info
('2-step verification code')
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
205 tfa_code
= remove_start(tfa_code
, 'G-')
208 user_hash
, None, 2, None,
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code
, True, 2]
215 self
._TFA
_URL
.format(tl
), tfa_req
,
216 'Submitting TFA code', 'Unable to submit TFA code')
218 if tfa_results
is False:
221 tfa_res
= try_get(tfa_results
, lambda x
: x
[0][5], list)
223 tfa_msg
= try_get(tfa_res
, lambda x
: x
[5], compat_str
)
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg
== 'INCORRECT_ANSWER_ENTERED' else tfa_msg
)
229 check_cookie_url
= try_get(
230 tfa_results
, lambda x
: x
[0][-1][2], compat_str
)
232 check_cookie_url
= try_get(res
, lambda x
: x
[2], compat_str
)
234 if not check_cookie_url
:
235 warn('Unable to extract CheckCookie URL')
238 check_cookie_results
= self
._download
_webpage
(
239 check_cookie_url
, None, 'Checking cookie', fatal
=False)
241 if check_cookie_results
is False:
244 if 'https://myaccount.google.com/' not in check_cookie_results
:
245 warn('Unable to log in')
250 def _download_webpage_handle(self
, *args
, **kwargs
):
251 kwargs
.setdefault('query', {})['disable_polymer'] = 'true'
252 return super(YoutubeBaseInfoExtractor
, self
)._download
_webpage
_handle
(
253 *args
, **compat_kwargs(kwargs
))
255 def _real_initialize(self
):
256 if self
._downloader
is None:
259 if not self
._login
():
263 class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor
):
264 # Extract entries from page with "Load more" button
265 def _entries(self
, page
, playlist_id
):
266 more_widget_html
= content_html
= page
267 for page_num
in itertools
.count(1):
268 for entry
in self
._process
_page
(content_html
):
271 mobj
= re
.search(r
'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html
)
275 more
= self
._download
_json
(
276 'https://youtube.com/%s' % mobj
.group('more'), playlist_id
,
277 'Downloading page #%s' % page_num
,
278 transform_source
=uppercase_escape
)
279 content_html
= more
['content_html']
280 if not content_html
.strip():
281 # Some webpages show a "Load more" button but they don't
284 more_widget_html
= more
['load_more_widget_html']
287 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor
):
288 def _process_page(self
, content
):
289 for video_id
, video_title
in self
.extract_videos_from_page(content
):
290 yield self
.url_result(video_id
, 'Youtube', video_id
, video_title
)
292 def extract_videos_from_page(self
, page
):
295 for mobj
in re
.finditer(self
._VIDEO
_RE
, page
):
296 # The link with index 0 is not the first video of the playlist (not sure if still actual)
297 if 'index' in mobj
.groupdict() and mobj
.group('id') == '0':
299 video_id
= mobj
.group('id')
300 video_title
= unescapeHTML(mobj
.group('title'))
302 video_title
= video_title
.strip()
304 idx
= ids_in_page
.index(video_id
)
305 if video_title
and not titles_in_page
[idx
]:
306 titles_in_page
[idx
] = video_title
308 ids_in_page
.append(video_id
)
309 titles_in_page
.append(video_title
)
310 return zip(ids_in_page
, titles_in_page
)
313 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor
):
314 def _process_page(self
, content
):
315 for playlist_id
in orderedSet(re
.findall(
316 r
'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
318 yield self
.url_result(
319 'https://www.youtube.com/playlist?list=%s' % playlist_id
, 'YoutubePlaylist')
321 def _real_extract(self
, url
):
322 playlist_id
= self
._match
_id
(url
)
323 webpage
= self
._download
_webpage
(url
, playlist_id
)
324 title
= self
._og
_search
_title
(webpage
, fatal
=False)
325 return self
.playlist_result(self
._entries
(webpage
, playlist_id
), playlist_id
, title
)
328 class YoutubeIE(YoutubeBaseInfoExtractor
):
329 IE_DESC
= 'YouTube.com'
330 _VALID_URL
= r
"""(?x)^
332 (?:https?://|//) # http(s):// or protocol-independent URL
333 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
334 (?:www\.)?deturl\.com/www\.youtube\.com/|
335 (?:www\.)?pwnyoutube\.com/|
336 (?:www\.)?hooktube\.com/|
337 (?:www\.)?yourepeat\.com/|
338 tube\.majestyc\.net/|
339 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
340 (?:.*?\#/)? # handle anchor (#/) redirect urls
341 (?: # the various things that can precede the ID:
342 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
343 |(?: # or the v= param in all its forms
344 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
345 (?:\?|\#!?) # the params delimiter ? or # or #!
346 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
351 youtu\.be| # just youtu.be/xxxx
352 vid\.plus| # or vid.plus/xxxx
353 zwearz\.com/watch| # or zwearz.com/watch/xxxx
355 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
357 )? # all until now is optional -> you can pass the naked ID
358 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
361 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
362 WL # WL are handled by the watch later IE
365 (?(1).+)? # if we found the ID, everything can follow
366 $""" % {'playlist_id': YoutubeBaseInfoExtractor
._PLAYLIST
_ID
_RE
}
367 _NEXT_URL_RE
= r
'[\?&]next_url=([^&]+)'
369 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
370 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
371 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
372 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
373 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
374 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
375 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
376 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
377 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
378 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
379 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
380 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
381 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
382 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
383 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
384 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
385 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
386 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
390 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
391 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
392 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
393 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
394 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
395 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
396 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
398 # Apple HTTP Live Streaming
399 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
400 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
401 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
402 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
403 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
404 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
405 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
406 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
409 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
410 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
411 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
412 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
413 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
414 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
415 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
416 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
417 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
418 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
419 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
420 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
423 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
424 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
425 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
426 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
427 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
428 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
429 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
432 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
433 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
434 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
435 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
436 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
437 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
438 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
439 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
440 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
441 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
442 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
443 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
444 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
445 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
446 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
447 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
448 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
449 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
450 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
451 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
452 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
456 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
457 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
459 # Dash webm audio with opus inside
460 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
461 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
462 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
465 '_rtmp': {'protocol': 'rtmp'},
467 _SUBTITLE_FORMATS
= ('ttml', 'vtt')
474 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
478 'title': 'youtube-dl test video "\'/\\Ƥāš',
479 'uploader': 'Philipp Hagemeister',
480 'uploader_id': 'phihag',
481 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/phihag',
482 'upload_date': '20121002',
483 'license': 'Standard YouTube License',
484 'description': 'test chars: "\'/\\Ƥāš\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
485 'categories': ['Science & Technology'],
486 'tags': ['youtube-dl'],
489 'dislike_count': int,
495 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
496 'note': 'Test generic use_cipher_signature video (#897)',
500 'upload_date': '20120506',
501 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
502 'alt_title': 'I Love It (feat. Charli XCX)',
503 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
504 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
505 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
506 'iconic ep', 'iconic', 'love', 'it'],
508 'uploader': 'Icona Pop',
509 'uploader_id': 'IconaPop',
510 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/IconaPop',
511 'license': 'Standard YouTube License',
512 'creator': 'Icona Pop',
513 'track': 'I Love It (feat. Charli XCX)',
514 'artist': 'Icona Pop',
518 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
519 'note': 'Test VEVO video with age protection (#956)',
523 'upload_date': '20130703',
524 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
525 'alt_title': 'Tunnel Vision',
526 'description': 'md5:64249768eec3bc4276236606ea996373',
528 'uploader': 'justintimberlakeVEVO',
529 'uploader_id': 'justintimberlakeVEVO',
530 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
531 'license': 'Standard YouTube License',
532 'creator': 'Justin Timberlake',
533 'track': 'Tunnel Vision',
534 'artist': 'Justin Timberlake',
539 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
540 'note': 'Embed-only video (#1746)',
544 'upload_date': '20120608',
545 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
546 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
547 'uploader': 'SET India',
548 'uploader_id': 'setindia',
549 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/setindia',
550 'license': 'Standard YouTube License',
555 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
556 'note': 'Use the first video ID in the URL',
560 'title': 'youtube-dl test video "\'/\\Ƥāš',
561 'uploader': 'Philipp Hagemeister',
562 'uploader_id': 'phihag',
563 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/phihag',
564 'upload_date': '20121002',
565 'license': 'Standard YouTube License',
566 'description': 'test chars: "\'/\\Ƥāš\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
567 'categories': ['Science & Technology'],
568 'tags': ['youtube-dl'],
571 'dislike_count': int,
574 'skip_download': True,
578 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
579 'note': '256k DASH audio (format 141) via DASH manifest',
583 'upload_date': '20121002',
584 'uploader_id': '8KVIDEO',
585 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
587 'uploader': '8KVIDEO',
588 'license': 'Standard YouTube License',
589 'title': 'UHDTV TEST 8K VIDEO.mp4'
592 'youtube_include_dash_manifest': True,
595 'skip': 'format 141 not served anymore',
597 # DASH manifest with encrypted signature
599 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
603 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
604 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
606 'uploader': 'AfrojackVEVO',
607 'uploader_id': 'AfrojackVEVO',
608 'upload_date': '20131011',
609 'license': 'Standard YouTube License',
612 'youtube_include_dash_manifest': True,
613 'format': '141/bestaudio[ext=m4a]',
616 # JS player signature function name containing $
618 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
622 'title': 'Taylor Swift - Shake It Off',
623 'alt_title': 'Shake It Off',
624 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
626 'uploader': 'TaylorSwiftVEVO',
627 'uploader_id': 'TaylorSwiftVEVO',
628 'upload_date': '20140818',
629 'license': 'Standard YouTube License',
630 'creator': 'Taylor Swift',
633 'youtube_include_dash_manifest': True,
634 'format': '141/bestaudio[ext=m4a]',
639 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
644 'upload_date': '20100909',
645 'uploader': 'TJ Kirk',
646 'uploader_id': 'TheAmazingAtheist',
647 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
648 'license': 'Standard YouTube License',
649 'title': 'Burning Everyone\'s Koran',
650 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
653 # Normal age-gate video (No vevo, embed allowed)
655 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
659 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
660 'description': r
're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
662 'uploader': 'The Witcher',
663 'uploader_id': 'WitcherGame',
664 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
665 'upload_date': '20140605',
666 'license': 'Standard YouTube License',
670 # Age-gate video with encrypted signature
672 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
676 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
677 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
679 'uploader': 'LloydVEVO',
680 'uploader_id': 'LloydVEVO',
681 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
682 'upload_date': '20110629',
683 'license': 'Standard YouTube License',
687 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
688 # YouTube Red ad is not captured for creator
690 'url': '__2ABJjxzNo',
695 'upload_date': '20100430',
696 'uploader_id': 'deadmau5',
697 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/deadmau5',
698 'creator': 'deadmau5',
699 'description': 'md5:12c56784b8032162bb936a5f76d55360',
700 'uploader': 'deadmau5',
701 'license': 'Standard YouTube License',
702 'title': 'Deadmau5 - Some Chords (HD)',
703 'alt_title': 'Some Chords',
705 'expected_warnings': [
706 'DASH manifest missing',
709 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
711 'url': 'lqQg6PlCWgI',
716 'upload_date': '20150827',
717 'uploader_id': 'olympic',
718 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/olympic',
719 'license': 'Standard YouTube License',
720 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
721 'uploader': 'Olympic',
722 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
725 'skip_download': 'requires avconv',
730 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
734 'stretched_ratio': 16 / 9.,
736 'upload_date': '20110310',
737 'uploader_id': 'AllenMeow',
738 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
739 'description': 'made by Wacom from Korea | åå¹&å ę²¹ę·»é by TY\'s Allen | ęč¬heylisa00cavey1001ååøē±ę
ęä¾ę¢åēæ»čÆ',
740 'uploader': 'å«įį
',
741 'license': 'Standard YouTube License',
742 'title': '[A-made] č®ę
å¦åå¹ē å¤Ŗå¦ ęå°±ęÆéęØ£ēäŗŗ',
745 # url_encoded_fmt_stream_map is empty string
747 'url': 'qEJwOuvDf7I',
751 'title': 'ŠŠ±ŃŃŠ¶Š“ŠµŠ½ŠøŠµ ŃŃŠ“ŠµŠ±Š½Š¾Š¹ ŠæŃŠ°ŠŗŃŠøŠŗŠø ŠæŠ¾ Š²ŃŠ±Š¾ŃŠ°Š¼ 14 ŃŠµŠ½ŃŃŠ±ŃŃ 2014 Š³Š¾Š“Š° Š² Š”Š°Š½ŠŗŃ-ŠŠµŃŠµŃŠ±ŃŃŠ³Šµ',
753 'upload_date': '20150404',
754 'uploader_id': 'spbelect',
755 'uploader': 'ŠŠ°Š±Š»ŃŠ“Š°ŃŠµŠ»Šø ŠŠµŃŠµŃŠ±ŃŃŠ³Š°',
758 'skip_download': 'requires avconv',
760 'skip': 'This live event has ended.',
762 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
764 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
768 'title': 'md5:7b81415841e02ecd4313668cde88737a',
769 'description': 'md5:116377fd2963b81ec4ce64b542173306',
771 'upload_date': '20150625',
772 'uploader_id': 'dorappi2000',
773 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
774 'uploader': 'dorappi2000',
775 'license': 'Standard YouTube License',
776 'formats': 'mincount:31',
778 'skip': 'not actual anymore',
780 # DASH manifest with segment_list
782 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
783 'md5': '8ce563a1d667b599d21064e982ab9e31',
787 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
788 'uploader': 'Airtek',
789 'description': 'RetransmisiĆ³n en directo de la XVIII media maratĆ³n de Zaragoza.',
790 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
791 'license': 'Standard YouTube License',
792 'title': 'RetransmisiĆ³n XVIII Media maratĆ³n Zaragoza 2015',
795 'youtube_include_dash_manifest': True,
796 'format': '135', # bestvideo
798 'skip': 'This live event has ended.',
801 # Multifeed videos (multiple cameras), URL is for Main Camera
802 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
805 'title': 'teamPGP: Rocket League Noob Stream',
806 'description': 'md5:dc7872fb300e143831327f1bae3af010',
812 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
813 'description': 'md5:dc7872fb300e143831327f1bae3af010',
815 'upload_date': '20150721',
816 'uploader': 'Beer Games Beer',
817 'uploader_id': 'beergamesbeer',
818 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
819 'license': 'Standard YouTube License',
825 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
826 'description': 'md5:dc7872fb300e143831327f1bae3af010',
828 'upload_date': '20150721',
829 'uploader': 'Beer Games Beer',
830 'uploader_id': 'beergamesbeer',
831 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
832 'license': 'Standard YouTube License',
838 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
839 'description': 'md5:dc7872fb300e143831327f1bae3af010',
841 'upload_date': '20150721',
842 'uploader': 'Beer Games Beer',
843 'uploader_id': 'beergamesbeer',
844 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
845 'license': 'Standard YouTube License',
851 'title': 'teamPGP: Rocket League Noob Stream (zim)',
852 'description': 'md5:dc7872fb300e143831327f1bae3af010',
854 'upload_date': '20150721',
855 'uploader': 'Beer Games Beer',
856 'uploader_id': 'beergamesbeer',
857 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
858 'license': 'Standard YouTube License',
862 'skip_download': True,
866 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
867 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
870 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
873 'skip': 'Not multifeed anymore',
876 'url': 'https://vid.plus/FlRa-iH7PGw',
877 'only_matching': True,
880 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
881 'only_matching': True,
884 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
885 # Also tests cut-off URL expansion in video description (see
886 # https://github.com/rg3/youtube-dl/issues/1892,
887 # https://github.com/rg3/youtube-dl/issues/8164)
888 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
892 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
893 'alt_title': 'Dark Walk - Position Music',
894 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
896 'upload_date': '20151119',
897 'uploader_id': 'IronSoulElf',
898 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
899 'uploader': 'IronSoulElf',
900 'license': 'Standard YouTube License',
901 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
902 'track': 'Dark Walk - Position Music',
903 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
906 'skip_download': True,
910 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
911 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
912 'only_matching': True,
915 # Video with yt:stretch=17:0
916 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
920 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
921 'description': 'md5:ee18a25c350637c8faff806845bddee9',
922 'upload_date': '20151107',
923 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
924 'uploader': 'CH GAMER DROID',
927 'skip_download': True,
929 'skip': 'This video does not exist.',
932 # Video licensed under Creative Commons
933 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
937 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
938 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
940 'upload_date': '20150127',
941 'uploader_id': 'BerkmanCenter',
942 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
943 'uploader': 'The Berkman Klein Center for Internet & Society',
944 'license': 'Creative Commons Attribution license (reuse allowed)',
947 'skip_download': True,
951 # Channel-like uploader_url
952 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
956 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
957 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
959 'upload_date': '20151119',
960 'uploader': 'Bernie Sanders',
961 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
962 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
963 'license': 'Creative Commons Attribution license (reuse allowed)',
966 'skip_download': True,
970 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
971 'only_matching': True,
974 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
975 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
976 'only_matching': True,
979 # Rental video preview
980 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
984 'title': 'Piku - Trailer',
985 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
986 'upload_date': '20150811',
987 'uploader': 'FlixMatrix',
988 'uploader_id': 'FlixMatrixKaravan',
989 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
990 'license': 'Standard YouTube License',
993 'skip_download': True,
995 'skip': 'This video is not available.',
998 # YouTube Red video with episode data
999 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1001 'id': 'iqKdEhx-dD4',
1003 'title': 'Isolation - Mind Field (Ep 1)',
1004 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
1006 'upload_date': '20170118',
1007 'uploader': 'Vsauce',
1008 'uploader_id': 'Vsauce',
1009 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1010 'license': 'Standard YouTube License',
1011 'series': 'Mind Field',
1013 'episode_number': 1,
1016 'skip_download': True,
1018 'expected_warnings': [
1019 'Skipping DASH manifest',
1023 # The following content has been identified by the YouTube community
1024 # as inappropriate or offensive to some audiences.
1025 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1027 'id': '6SJNVb0GnPI',
1029 'title': 'Race Differences in Intelligence',
1030 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1032 'upload_date': '20140124',
1033 'uploader': 'New Century Foundation',
1034 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1035 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1036 'license': 'Standard YouTube License',
1039 'skip_download': True,
1044 'url': '1t24XAntNCY',
1045 'only_matching': True,
1048 # geo restricted to JP
1049 'url': 'sJL6WA-aGkQ',
1050 'only_matching': True,
1053 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1054 'only_matching': True,
1058 def __init__(self
, *args
, **kwargs
):
1059 super(YoutubeIE
, self
).__init
__(*args
, **kwargs
)
1060 self
._player
_cache
= {}
1062 def report_video_info_webpage_download(self
, video_id
):
1063 """Report attempt to download video info webpage."""
1064 self
.to_screen('%s: Downloading video info webpage' % video_id
)
1066 def report_information_extraction(self
, video_id
):
1067 """Report attempt to extract video information."""
1068 self
.to_screen('%s: Extracting video information' % video_id
)
1070 def report_unavailable_format(self
, video_id
, format
):
1071 """Report extracted video URL."""
1072 self
.to_screen('%s: Format %s not available' % (video_id
, format
))
1074 def report_rtmp_download(self
):
1075 """Indicate the download will use the RTMP protocol."""
1076 self
.to_screen('RTMP download detected')
1078 def _signature_cache_id(self
, example_sig
):
1079 """ Return a string representation of a signature """
1080 return '.'.join(compat_str(len(part
)) for part
in example_sig
.split('.'))
1082 def _extract_signature_function(self
, video_id
, player_url
, example_sig
):
1084 r
'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1087 raise ExtractorError('Cannot identify player %r' % player_url
)
1088 player_type
= id_m
.group('ext')
1089 player_id
= id_m
.group('id')
1091 # Read from filesystem cache
1092 func_id
= '%s_%s_%s' % (
1093 player_type
, player_id
, self
._signature
_cache
_id
(example_sig
))
1094 assert os
.path
.basename(func_id
) == func_id
1096 cache_spec
= self
._downloader
.cache
.load('youtube-sigfuncs', func_id
)
1097 if cache_spec
is not None:
1098 return lambda s
: ''.join(s
[i
] for i
in cache_spec
)
1101 'Downloading player %s' % player_url
1102 if self
._downloader
.params
.get('verbose') else
1103 'Downloading %s player %s' % (player_type
, player_id
)
1105 if player_type
== 'js':
1106 code
= self
._download
_webpage
(
1107 player_url
, video_id
,
1109 errnote
='Download of %s failed' % player_url
)
1110 res
= self
._parse
_sig
_js
(code
)
1111 elif player_type
== 'swf':
1112 urlh
= self
._request
_webpage
(
1113 player_url
, video_id
,
1115 errnote
='Download of %s failed' % player_url
)
1117 res
= self
._parse
_sig
_swf
(code
)
1119 assert False, 'Invalid player type %r' % player_type
1121 test_string
= ''.join(map(compat_chr
, range(len(example_sig
))))
1122 cache_res
= res(test_string
)
1123 cache_spec
= [ord(c
) for c
in cache_res
]
1125 self
._downloader
.cache
.store('youtube-sigfuncs', func_id
, cache_spec
)
1128 def _print_sig_code(self
, func
, example_sig
):
1129 def gen_sig_code(idxs
):
1130 def _genslice(start
, end
, step
):
1131 starts
= '' if start
== 0 else str(start
)
1132 ends
= (':%d' % (end
+ step
)) if end
+ step
>= 0 else ':'
1133 steps
= '' if step
== 1 else (':%d' % step
)
1134 return 's[%s%s%s]' % (starts
, ends
, steps
)
1137 # Quelch pyflakes warnings - start will be set when step is set
1138 start
= '(Never used)'
1139 for i
, prev
in zip(idxs
[1:], idxs
[:-1]):
1140 if step
is not None:
1141 if i
- prev
== step
:
1143 yield _genslice(start
, prev
, step
)
1146 if i
- prev
in [-1, 1]:
1151 yield 's[%d]' % prev
1155 yield _genslice(start
, i
, step
)
1157 test_string
= ''.join(map(compat_chr
, range(len(example_sig
))))
1158 cache_res
= func(test_string
)
1159 cache_spec
= [ord(c
) for c
in cache_res
]
1160 expr_code
= ' + '.join(gen_sig_code(cache_spec
))
1161 signature_id_tuple
= '(%s)' % (
1162 ', '.join(compat_str(len(p
)) for p
in example_sig
.split('.')))
1163 code
= ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1164 ' return %s\n') % (signature_id_tuple
, expr_code
)
1165 self
.to_screen('Extracted signature function:\n' + code
)
1167 def _parse_sig_js(self
, jscode
):
1168 funcname
= self
._search
_regex
(
1169 (r
'(["\'])signature\
1\s
*,\s
*(?P
<sig
>[a
-zA
-Z0
-9$
]+)\
(',
1170 r'\
.sig\|\|
(?P
<sig
>[a
-zA
-Z0
-9$
]+)\
('),
1171 jscode, 'Initial JS player signature function name
', group='sig
')
1173 jsi = JSInterpreter(jscode)
1174 initial_function = jsi.extract_function(funcname)
1175 return lambda s: initial_function([s])
1177 def _parse_sig_swf(self, file_contents):
1178 swfi = SWFInterpreter(file_contents)
1179 TARGET_CLASSNAME = 'SignatureDecipher
'
1180 searched_class = swfi.extract_class(TARGET_CLASSNAME)
1181 initial_function = swfi.extract_function(searched_class, 'decipher
')
1182 return lambda s: initial_function([s])
1184 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
1185 """Turn the encrypted s field into a working signature"""
1187 if player_url is None:
1188 raise ExtractorError('Cannot decrypt signature without player_url
')
1190 if player_url.startswith('//'):
1191 player_url = 'https
:' + player_url
1192 elif not re.match(r'https?
://', player_url):
1193 player_url = compat_urlparse.urljoin(
1194 'https
://www
.youtube
.com
', player_url)
1196 player_id = (player_url, self._signature_cache_id(s))
1197 if player_id not in self._player_cache:
1198 func = self._extract_signature_function(
1199 video_id, player_url, s
1201 self._player_cache[player_id] = func
1202 func = self._player_cache[player_id]
1203 if self._downloader.params.get('youtube_print_sig_code
'):
1204 self._print_sig_code(func, s)
1206 except Exception as e:
1207 tb = traceback.format_exc()
1208 raise ExtractorError(
1209 'Signature extraction failed
: ' + tb, cause=e)
1211 def _get_subtitles(self, video_id, webpage):
1213 subs_doc = self._download_xml(
1214 'https
://video
.google
.com
/timedtext?hl
=en
&type=list&v
=%s' % video_id,
1215 video_id, note=False)
1216 except ExtractorError as err:
1217 self._downloader.report_warning('unable to download video subtitles
: %s' % error_to_compat_str(err))
1221 for track in subs_doc.findall('track
'):
1222 lang = track.attrib['lang_code
']
1223 if lang in sub_lang_list:
1226 for ext in self._SUBTITLE_FORMATS:
1227 params = compat_urllib_parse_urlencode({
1231 'name
': track.attrib['name
'].encode('utf
-8'),
1233 sub_formats.append({
1234 'url
': 'https
://www
.youtube
.com
/api
/timedtext?
' + params,
1237 sub_lang_list[lang] = sub_formats
1238 if not sub_lang_list:
1239 self._downloader.report_warning('video doesn
\'t have subtitles
')
1241 return sub_lang_list
1243 def _get_ytplayer_config(self, video_id, webpage):
1245 # User data may contain arbitrary character sequences that may affect
1246 # JSON extraction with regex, e.g. when '};' is contained the second
1247 # regex won't capture the whole JSON
. Yet working around by trying more
1248 # concrete regex first keeping in mind proper quoted string handling
1249 # to be implemented in future that will replace this workaround (see
1250 # https://github.com/rg3/youtube-dl/issues/7468,
1251 # https://github.com/rg3/youtube-dl/pull/7599)
1252 r
';ytplayer\.config\s*=\s*({.+?});ytplayer',
1253 r
';ytplayer\.config\s*=\s*({.+?});',
1255 config
= self
._search
_regex
(
1256 patterns
, webpage
, 'ytplayer.config', default
=None)
1258 return self
._parse
_json
(
1259 uppercase_escape(config
), video_id
, fatal
=False)
1261 def _get_automatic_captions(self
, video_id
, webpage
):
1262 """We need the webpage for getting the captions url, pass it as an
1263 argument to speed up the process."""
1264 self
.to_screen('%s: Looking for automatic captions' % video_id
)
1265 player_config
= self
._get
_ytplayer
_config
(video_id
, webpage
)
1266 err_msg
= 'Couldn\'t find automatic captions for %s' % video_id
1267 if not player_config
:
1268 self
._downloader
.report_warning(err_msg
)
1271 args
= player_config
['args']
1272 caption_url
= args
.get('ttsurl')
1274 timestamp
= args
['timestamp']
1275 # We get the available subtitles
1276 list_params
= compat_urllib_parse_urlencode({
1281 list_url
= caption_url
+ '&' + list_params
1282 caption_list
= self
._download
_xml
(list_url
, video_id
)
1283 original_lang_node
= caption_list
.find('track')
1284 if original_lang_node
is None:
1285 self
._downloader
.report_warning('Video doesn\'t have automatic captions')
1287 original_lang
= original_lang_node
.attrib
['lang_code']
1288 caption_kind
= original_lang_node
.attrib
.get('kind', '')
1291 for lang_node
in caption_list
.findall('target'):
1292 sub_lang
= lang_node
.attrib
['lang_code']
1294 for ext
in self
._SUBTITLE
_FORMATS
:
1295 params
= compat_urllib_parse_urlencode({
1296 'lang': original_lang
,
1300 'kind': caption_kind
,
1302 sub_formats
.append({
1303 'url': caption_url
+ '&' + params
,
1306 sub_lang_list
[sub_lang
] = sub_formats
1307 return sub_lang_list
1309 def make_captions(sub_url
, sub_langs
):
1310 parsed_sub_url
= compat_urllib_parse_urlparse(sub_url
)
1311 caption_qs
= compat_parse_qs(parsed_sub_url
.query
)
1313 for sub_lang
in sub_langs
:
1315 for ext
in self
._SUBTITLE
_FORMATS
:
1317 'tlang': [sub_lang
],
1320 sub_url
= compat_urlparse
.urlunparse(parsed_sub_url
._replace
(
1321 query
=compat_urllib_parse_urlencode(caption_qs
, True)))
1322 sub_formats
.append({
1326 captions
[sub_lang
] = sub_formats
1329 # New captions format as of 22.06.2017
1330 player_response
= args
.get('player_response')
1331 if player_response
and isinstance(player_response
, compat_str
):
1332 player_response
= self
._parse
_json
(
1333 player_response
, video_id
, fatal
=False)
1335 renderer
= player_response
['captions']['playerCaptionsTracklistRenderer']
1336 base_url
= renderer
['captionTracks'][0]['baseUrl']
1338 for lang
in renderer
['translationLanguages']:
1339 lang_code
= lang
.get('languageCode')
1341 sub_lang_list
.append(lang_code
)
1342 return make_captions(base_url
, sub_lang_list
)
1344 # Some videos don't provide ttsurl but rather caption_tracks and
1345 # caption_translation_languages (e.g. 20LmZk1hakA)
1346 # Does not used anymore as of 22.06.2017
1347 caption_tracks
= args
['caption_tracks']
1348 caption_translation_languages
= args
['caption_translation_languages']
1349 caption_url
= compat_parse_qs(caption_tracks
.split(',')[0])['u'][0]
1351 for lang
in caption_translation_languages
.split(','):
1352 lang_qs
= compat_parse_qs(compat_urllib_parse_unquote_plus(lang
))
1353 sub_lang
= lang_qs
.get('lc', [None])[0]
1355 sub_lang_list
.append(sub_lang
)
1356 return make_captions(caption_url
, sub_lang_list
)
1357 # An extractor error can be raise by the download process if there are
1358 # no automatic captions but there are subtitles
1359 except (KeyError, IndexError, ExtractorError
):
1360 self
._downloader
.report_warning(err_msg
)
1363 def _mark_watched(self
, video_id
, video_info
):
1364 playback_url
= video_info
.get('videostats_playback_base_url', [None])[0]
1365 if not playback_url
:
1367 parsed_playback_url
= compat_urlparse
.urlparse(playback_url
)
1368 qs
= compat_urlparse
.parse_qs(parsed_playback_url
.query
)
1370 # cpn generation algorithm is reverse engineered from base.js.
1371 # In fact it works even with dummy cpn.
1372 CPN_ALPHABET
= 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1373 cpn
= ''.join((CPN_ALPHABET
[random
.randint(0, 256) & 63] for _
in range(0, 16)))
1379 playback_url
= compat_urlparse
.urlunparse(
1380 parsed_playback_url
._replace
(query
=compat_urllib_parse_urlencode(qs
, True)))
1382 self
._download
_webpage
(
1383 playback_url
, video_id
, 'Marking watched',
1384 'Unable to mark watched', fatal
=False)
1387 def _extract_urls(webpage
):
1388 # Embedded YouTube player
1390 unescapeHTML(mobj
.group('url'))
1391 for mobj
in re
.finditer(r
'''(?x)
1401 (?P
<url
>(?
:https?
:)?
//(?
:www\
.)?
youtube(?
:-nocookie
)?\
.com
/
1402 (?
:embed|v|p
)/[0-9A
-Za
-z_
-]{11}
.*?
)
1405 # lazyYT YouTube embed
1406 entries.extend(list(map(
1408 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1410 # Wordpress "YouTube Video Importer" plugin
1411 matches = re.findall(r'''(?x
)<div
[^
>]+
1412 class=(?P
<q1
>[\'"])[^\'"]*\byvii
_single
_video
_player
\b[^
\'"]*(?P=q1)[^>]+
1413 data-video_id=(?P<q2>[\'"])([^
\'"]+)(?P=q2)''', webpage)
1414 entries.extend(m[-1] for m in matches)
1419 def _extract_url(webpage):
1420 urls = YoutubeIE._extract_urls(webpage)
1421 return urls[0] if urls else None
1424 def extract_id(cls, url):
1425 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1427 raise ExtractorError('Invalid URL: %s' % url)
1428 video_id = mobj.group(2)
1431 def _extract_annotations(self, video_id):
1432 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1433 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1436 def _extract_chapters(description, duration):
1439 chapter_lines = re.findall(
1440 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\
.www\
.watch\
.player\
.seekTo
[^
>]+>(\d
{1,2}:\d
{1,2}(?
::\d
{1,2})?
)</a
>[^
>]*)(?
=$|
<br\s
*/>)',
1442 if not chapter_lines:
1445 for next_num, (chapter_line, time_point) in enumerate(
1446 chapter_lines, start=1):
1447 start_time = parse_duration(time_point)
1448 if start_time is None:
1450 if start_time > duration:
1452 end_time = (duration if next_num == len(chapter_lines)
1453 else parse_duration(chapter_lines[next_num][1]))
1454 if end_time is None:
1456 if end_time > duration:
1458 if start_time > end_time:
1460 chapter_title = re.sub(
1461 r'<a
[^
>]+>[^
<]+</a
>', '', chapter_line).strip(' \t-')
1462 chapter_title = re.sub(r'\s
+', ' ', chapter_title)
1464 'start_time
': start_time,
1465 'end_time
': end_time,
1466 'title
': chapter_title,
1470 def _real_extract(self, url):
1471 url, smuggled_data = unsmuggle_url(url, {})
1474 'http
' if self._downloader.params.get('prefer_insecure
', False)
1479 parsed_url = compat_urllib_parse_urlparse(url)
1480 for component in [parsed_url.fragment, parsed_url.query]:
1481 query = compat_parse_qs(component)
1482 if start_time is None and 't
' in query:
1483 start_time = parse_duration(query['t
'][0])
1484 if start_time is None and 'start
' in query:
1485 start_time = parse_duration(query['start
'][0])
1486 if end_time is None and 'end
' in query:
1487 end_time = parse_duration(query['end
'][0])
1489 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1490 mobj = re.search(self._NEXT_URL_RE, url)
1492 url = proto + '://www
.youtube
.com
/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1493 video_id = self.extract_id(url)
1496 url = proto + '://www
.youtube
.com
/watch?v
=%s&gl
=US
&hl
=en
&has_verified
=1&bpctr
=9999999999' % video_id
1497 video_webpage = self._download_webpage(url, video_id)
1499 # Attempt to extract SWF player URL
1500 mobj = re.search(r'swfConfig
.*?
"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1501 if mobj is not None:
1502 player_url = re.sub(r'\\(.)', r'\
1', mobj.group(1))
1508 def add_dash_mpd(video_info):
1509 dash_mpd = video_info.get('dashmpd
')
1510 if dash_mpd and dash_mpd[0] not in dash_mpds:
1511 dash_mpds.append(dash_mpd[0])
1516 def extract_view_count(v_info):
1517 return int_or_none(try_get(v_info, lambda x: x['view_count
'][0]))
1520 embed_webpage = None
1521 if re.search(r'player
-age
-gate
-content
">', video_webpage) is not None:
1523 # We simulate the access to the video from www.youtube.com/v/{video_id}
1524 # this can be viewed without login into Youtube
1525 url = proto + '://www.youtube.com/embed/%s' % video_id
1526 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1527 data = compat_urllib_parse_urlencode({
1528 'video_id': video_id,
1529 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1530 'sts': self._search_regex(
1531 r'"sts
"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1533 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1534 video_info_webpage = self._download_webpage(
1535 video_info_url, video_id,
1536 note='Refetching age-gated info webpage',
1537 errnote='unable to download video info webpage')
1538 video_info = compat_parse_qs(video_info_webpage)
1539 add_dash_mpd(video_info)
1544 # Try looking directly into the video webpage
1545 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1547 args = ytplayer_config['args']
1548 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1549 # Convert to the same format returned by compat_parse_qs
1550 video_info = dict((k, [v]) for k, v in args.items())
1551 add_dash_mpd(video_info)
1552 # Rental video is not rented but preview is available (e.g.
1553 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1554 # https://github.com/rg3/youtube-dl/issues/10532)
1555 if not video_info and args.get('ypc_vid'):
1556 return self.url_result(
1557 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1558 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1560 sts = ytplayer_config.get('sts')
1561 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1562 # We also try looking in get_video_info since it may contain different dashmpd
1563 # URL that points to a DASH manifest with possibly different itag set (some itags
1564 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1565 # manifest pointed by get_video_info's dashmpd).
1566 # The general idea is to take a union of itags of both DASH manifests (for example
1567 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1568 self.report_video_info_webpage_download(video_id)
1569 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1571 'video_id': video_id,
1581 video_info_webpage = self._download_webpage(
1582 '%s://www.youtube.com/get_video_info' % proto,
1583 video_id, note=False,
1584 errnote='unable to download video info webpage',
1585 fatal=False, query=query)
1586 if not video_info_webpage:
1588 get_video_info = compat_parse_qs(video_info_webpage)
1589 add_dash_mpd(get_video_info)
1590 if view_count is None:
1591 view_count = extract_view_count(get_video_info)
1593 video_info = get_video_info
1594 if 'token' in get_video_info:
1595 # Different get_video_info requests may report different results, e.g.
1596 # some may report video unavailability, but some may serve it without
1597 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1598 # the original webpage as well as el=info and el=embedded get_video_info
1599 # requests report video unavailability due to geo restriction while
1600 # el=detailpage succeeds and returns valid data). This is probably
1601 # due to YouTube measures against IP ranges of hosting providers.
1602 # Working around by preferring the first succeeded video_info containing
1603 # the token if no such video_info yet was found.
1604 if 'token' not in video_info:
1605 video_info = get_video_info
1608 def extract_unavailable_message():
1609 return self._html_search_regex(
1610 r'(?s)<h1[^>]+id="unavailable
-message
"[^>]*>(.+?)</h1>',
1611 video_webpage, 'unavailable message', default=None)
1613 if 'token' not in video_info:
1614 if 'reason' in video_info:
1615 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1616 regions_allowed = self._html_search_meta(
1617 'regionsAllowed', video_webpage, default=None)
1618 countries = regions_allowed.split(',') if regions_allowed else None
1619 self.raise_geo_restricted(
1620 msg=video_info['reason'][0], countries=countries)
1621 reason = video_info['reason'][0]
1622 if 'Invalid parameters' in reason:
1623 unavailable_message = extract_unavailable_message()
1624 if unavailable_message:
1625 reason = unavailable_message
1626 raise ExtractorError(
1627 'YouTube said: %s' % reason,
1628 expected=True, video_id=video_id)
1630 raise ExtractorError(
1631 '"token
" parameter not in video info for unknown reason',
1635 if 'title' in video_info:
1636 video_title = video_info['title'][0]
1638 self._downloader.report_warning('Unable to extract video title')
1642 description_original = video_description = get_element_by_id("eow
-description
", video_webpage)
1643 if video_description:
1646 redir_url = compat_urlparse.urljoin(url, m.group(1))
1647 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1648 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1649 qs = compat_parse_qs(parsed_redir_url.query)
1655 description_original = video_description = re.sub(r'''(?x)
1657 (?:[a-zA-Z-]+="[^
"]*"\s
+)*?
1658 (?
:title|href
)="([^"]+)"\s+
1659 (?:[a-zA-Z-]+="[^
"]*"\s
+)*?
1663 ''', replace_url, video_description)
1664 video_description = clean_html(video_description)
1666 fd_mobj = re.search(r'<meta name="description
" content="([^
"]+)"', video_webpage)
1668 video_description = unescapeHTML(fd_mobj.group(1))
1670 video_description = ''
1672 if 'multifeed_metadata_list
' in video_info and not smuggled_data.get('force_singlefeed
', False):
1673 if not self._downloader.params.get('noplaylist
'):
1676 multifeed_metadata_list = video_info['multifeed_metadata_list
'][0]
1677 for feed in multifeed_metadata_list.split(','):
1678 # Unquote should take place before split on comma (,) since textual
1679 # fields may contain comma as well (see
1680 # https://github.com/rg3/youtube-dl/issues/8536)
1681 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1683 '_type
': 'url_transparent
',
1684 'ie_key
': 'Youtube
',
1686 '%s://www
.youtube
.com
/watch?v
=%s' % (proto, feed_data['id'][0]),
1687 {'force_singlefeed
': True}),
1688 'title
': '%s (%s)' % (video_title, feed_data['title
'][0]),
1690 feed_ids.append(feed_data['id'][0])
1692 'Downloading multifeed
video (%s) - add
--no
-playlist to just download video
%s'
1693 % (', '.join(feed_ids), video_id))
1694 return self.playlist_result(entries, video_id, video_title, video_description)
1695 self.to_screen('Downloading just video
%s because of
--no
-playlist
' % video_id)
1697 if view_count is None:
1698 view_count = extract_view_count(video_info)
1700 # Check for "rental" videos
1701 if 'ypc_video_rental_bar_text
' in video_info and 'author
' not in video_info:
1702 raise ExtractorError('"rental" videos
not supported
. See https
://github
.com
/rg3
/youtube
-dl
/issues
/359 for more information
.', expected=True)
1704 def _extract_filesize(media_url):
1705 return int_or_none(self._search_regex(
1706 r'\bclen
[=/](\d
+)', media_url, 'filesize
', default=None))
1708 if 'conn
' in video_info and video_info['conn
'][0].startswith('rtmp
'):
1709 self.report_rtmp_download()
1711 'format_id
': '_rtmp
',
1713 'url
': video_info['conn
'][0],
1714 'player_url
': player_url,
1716 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map
', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts
', [''])[0]) >= 1):
1717 encoded_url_map = video_info.get('url_encoded_fmt_stream_map
', [''])[0] + ',' + video_info.get('adaptive_fmts
', [''])[0]
1718 if 'rtmpe
%3Dyes
' in encoded_url_map:
1719 raise ExtractorError('rtmpe downloads are
not supported
, see https
://github
.com
/rg3
/youtube
-dl
/issues
/343 for more information
.', expected=True)
1721 fmt_list = video_info.get('fmt_list
', [''])[0]
1723 for fmt in fmt_list.split(','):
1724 spec = fmt.split('/')
1726 width_height = spec[1].split('x
')
1727 if len(width_height) == 2:
1728 formats_spec[spec[0]] = {
1729 'resolution
': spec[1],
1730 'width
': int_or_none(width_height[0]),
1731 'height
': int_or_none(width_height[1]),
1733 q = qualities(['small
', 'medium
', 'hd720
'])
1735 for url_data_str in encoded_url_map.split(','):
1736 url_data = compat_parse_qs(url_data_str)
1737 if 'itag
' not in url_data or 'url
' not in url_data:
1739 format_id = url_data['itag
'][0]
1740 url = url_data['url
'][0]
1742 if 's
' in url_data or self._downloader.params.get('youtube_include_dash_manifest
', True):
1743 ASSETS_RE = r'"assets":.+?
"js":\s
*("[^"]+")'
1744 jsplayer_url_json = self._search_regex(
1746 embed_webpage if age_gate else video_webpage,
1747 'JS player URL (1)', default=None)
1748 if not jsplayer_url_json and not age_gate:
1749 # We need the embed website after all
1750 if embed_webpage is None:
1751 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1752 embed_webpage = self._download_webpage(
1753 embed_url, video_id, 'Downloading embed webpage')
1754 jsplayer_url_json = self._search_regex(
1755 ASSETS_RE, embed_webpage, 'JS player URL')
1757 player_url = json.loads(jsplayer_url_json)
1758 if player_url is None:
1759 player_url_json = self._search_regex(
1760 r'ytplayer\.config.*?"url
"\s*:\s*("[^
"]+")',
1761 video_webpage, 'age gate player URL
')
1762 player_url = json.loads(player_url_json)
1764 if 'sig
' in url_data:
1765 url += '&signature
=' + url_data['sig
'][0]
1766 elif 's
' in url_data:
1767 encrypted_sig = url_data['s
'][0]
1769 if self._downloader.params.get('verbose
'):
1770 if player_url is None:
1771 player_version = 'unknown
'
1772 player_desc = 'unknown
'
1774 if player_url.endswith('swf
'):
1775 player_version = self._search_regex(
1776 r'-(.+?
)(?
:/watch_as3
)?\
.swf$
', player_url,
1777 'flash player
', fatal=False)
1778 player_desc = 'flash player
%s' % player_version
1780 player_version = self._search_regex(
1781 [r'html5player
-([^
/]+?
)(?
:/html5player(?
:-new
)?
)?\
.js
',
1782 r'(?
:www|player
)-([^
/]+)(?
:/[a
-z
]{2}_
[A
-Z
]{2}
)?
/base\
.js
'],
1784 'html5 player
', fatal=False)
1785 player_desc = 'html5 player
%s' % player_version
1787 parts_sizes = self._signature_cache_id(encrypted_sig)
1788 self.to_screen('{%s} signature length
%s, %s' %
1789 (format_id, parts_sizes, player_desc))
1791 signature = self._decrypt_signature(
1792 encrypted_sig, video_id, player_url, age_gate)
1793 url += '&signature
=' + signature
1794 if 'ratebypass
' not in url:
1795 url += '&ratebypass
=yes
'
1798 'format_id
': format_id,
1800 'player_url
': player_url,
1802 if format_id in self._formats:
1803 dct.update(self._formats[format_id])
1804 if format_id in formats_spec:
1805 dct.update(formats_spec[format_id])
1807 # Some itags are not included in DASH manifest thus corresponding formats will
1808 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1809 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1810 mobj = re.search(r'^
(?P
<width
>\d
+)[xX
](?P
<height
>\d
+)$
', url_data.get('size
', [''])[0])
1811 width, height = (int(mobj.group('width
')), int(mobj.group('height
'))) if mobj else (None, None)
1813 filesize = int_or_none(url_data.get(
1814 'clen
', [None])[0]) or _extract_filesize(url)
1816 quality = url_data.get('quality_label
', [None])[0] or url_data.get('quality
', [None])[0]
1819 'filesize
': filesize,
1820 'tbr
': float_or_none(url_data.get('bitrate
', [None])[0], 1000),
1823 'fps
': int_or_none(url_data.get('fps
', [None])[0]),
1824 'format_note
': quality,
1825 'quality
': q(quality),
1827 for key, value in more_fields.items():
1830 type_ = url_data.get('type', [None])[0]
1832 type_split = type_.split(';')
1833 kind_ext = type_split[0].split('/')
1834 if len(kind_ext) == 2:
1836 dct['ext
'] = mimetype2ext(type_split[0])
1837 if kind in ('audio
', 'video
'):
1839 for mobj in re.finditer(
1840 r'(?P
<key
>[a
-zA
-Z_
-]+)=(?P
<quote
>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1841 if mobj.group('key') == 'codecs':
1842 codecs = mobj.group('val')
1845 dct.update(parse_codecs(codecs))
1846 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1847 dct['downloader_options'] = {
1848 # Youtube throttles chunks >~10M
1849 'http_chunk_size': 10485760,
1852 elif video_info.get('hlsvp'):
1853 manifest_url = video_info['hlsvp'][0]
1855 m3u8_formats = self._extract_m3u8_formats(
1856 manifest_url, video_id, 'mp4', fatal=False)
1857 for a_format in m3u8_formats:
1858 itag = self._search_regex(
1859 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1861 a_format['format_id'] = itag
1862 if itag in self._formats:
1863 dct = self._formats[itag].copy()
1864 dct.update(a_format)
1866 a_format['player_url'] = player_url
1867 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1868 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1869 formats.append(a_format)
1871 error_message = clean_html(video_info.get('reason', [None])[0])
1872 if not error_message:
1873 error_message = extract_unavailable_message()
1875 raise ExtractorError(error_message, expected=True)
1876 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1879 video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1881 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1883 self._downloader.report_warning('unable to extract uploader name')
1886 video_uploader_id = None
1887 video_uploader_url = None
1889 r'<link itemprop="url
" href="(?P
<uploader_url
>https?
://www\
.youtube\
.com
/(?
:user|channel
)/(?P
<uploader_id
>[^
"]+))">',
1891 if mobj is not None:
1892 video_uploader_id = mobj.group('uploader_id
')
1893 video_uploader_url = mobj.group('uploader_url
')
1895 self._downloader.report_warning('unable to extract uploader nickname
')
1898 # We try first to get a high quality image:
1899 m_thumb = re.search(r'<span itemprop
="thumbnail".*?href
="(.*?)">',
1900 video_webpage, re.DOTALL)
1901 if m_thumb is not None:
1902 video_thumbnail = m_thumb.group(1)
1903 elif 'thumbnail_url
' not in video_info:
1904 self._downloader.report_warning('unable to extract video thumbnail
')
1905 video_thumbnail = None
1906 else: # don't panic
if we can
't find it
1907 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url
'][0])
1910 upload_date = self._html_search_meta(
1911 'datePublished
', video_webpage, 'upload date
', default=None)
1913 upload_date = self._search_regex(
1914 [r'(?s
)id="eow-date.*?>(.*?)</span>',
1915 r'(?:id="watch
-uploader
-info
".*?>.*?|["\']simpleText
["\']\s*:\s*["\'])(?
:Published|Uploaded|Streamed live|Started
) on (.+?
)[<"\']'],
1916 video_webpage, 'upload date', default=None)
1917 upload_date = unified_strdate(upload_date)
1919 video_license = self._html_search_regex(
1920 r'<h4[^>]+class="title
"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1921 video_webpage, 'license', default=None)
1923 m_music = re.search(
1925 <h4[^>]+class="title
"[^>]*>\s*Music\s*</h4>\s*
1933 \bhref=["\']/red
[^
>]*>|
# drop possible
1934 >\s
*Listen ad
-free
with YouTube Red
# YouTube Red ad
1941 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1942 video_creator = clean_html(m_music.group('creator'))
1944 video_alt_title = video_creator = None
1946 def extract_meta(field):
1947 return self._html_search_regex(
1948 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1949 video_webpage, field, default=None)
1951 track = extract_meta('Song')
1952 artist = extract_meta('Artist')
1954 m_episode = re.search(
1955 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*ā¢\s*E(?P<episode>\d+)</span>',
1958 series = m_episode.group('series')
1959 season_number = int(m_episode.group('season'))
1960 episode_number = int(m_episode.group('episode'))
1962 series = season_number = episode_number = None
1964 m_cat_container = self._search_regex(
1965 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1966 video_webpage, 'categories', default=None)
1968 category = self._html_search_regex(
1969 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1971 video_categories = None if category is None else [category]
1973 video_categories = None
1976 unescapeHTML(m.group('content'))
1977 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1979 def _extract_count(count_name):
1980 return str_to_int(self._search_regex(
1981 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1982 % re.escape(count_name),
1983 video_webpage, count_name, default=None))
1985 like_count = _extract_count('like')
1986 dislike_count = _extract_count('dislike')
1989 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1990 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1992 video_duration = try_get(
1993 video_info, lambda x: int_or_none(x['length_seconds'][0]))
1994 if not video_duration:
1995 video_duration = parse_duration(self._html_search_meta(
1996 'duration', video_webpage, 'video duration'))
1999 video_annotations = None
2000 if self._downloader.params.get('writeannotations', False):
2001 video_annotations = self._extract_annotations(video_id)
2003 chapters = self._extract_chapters(description_original, video_duration)
2005 # Look for the DASH manifest
2006 if self._downloader.params.get('youtube_include_dash_manifest', True):
2007 dash_mpd_fatal = True
2008 for mpd_url in dash_mpds:
2011 def decrypt_sig(mobj):
2013 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2014 return '/signature/%s' % dec_s
2016 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2018 for df in self._extract_mpd_formats(
2019 mpd_url, video_id, fatal=dash_mpd_fatal,
2020 formats_dict=self._formats):
2021 if not df.get('filesize'):
2022 df['filesize'] = _extract_filesize(df['url'])
2023 # Do not overwrite DASH format found in some previous DASH manifest
2024 if df['format_id'] not in dash_formats:
2025 dash_formats[df['format_id']] = df
2026 # Additional DASH manifests may end up in HTTP Error 403 therefore
2027 # allow them to fail without bug report message if we already have
2028 # some DASH manifest succeeded. This is temporary workaround to reduce
2029 # burst of bug reports until we figure out the reason and whether it
2030 # can be fixed at all.
2031 dash_mpd_fatal = False
2032 except (ExtractorError, KeyError) as e:
2033 self.report_warning(
2034 'Skipping DASH manifest: %r' % e, video_id)
2036 # Remove the formats we found through non-DASH, they
2037 # contain less info and it can be wrong, because we use
2038 # fixed values (for example the resolution). See
2039 # https://github.com/rg3/youtube-dl/issues/5774 for an
2041 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
2042 formats.extend(dash_formats.values())
2044 # Check for malformed aspect ratio
2045 stretched_m = re.search(
2046 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2049 w = float(stretched_m.group('w'))
2050 h = float(stretched_m.group('h'))
2051 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2052 # We will only process correct ratios.
2056 if f.get('vcodec') != 'none':
2057 f['stretched_ratio'] = ratio
2059 self._sort_formats(formats)
2061 self.mark_watched(video_id, video_info)
2065 'uploader': video_uploader,
2066 'uploader_id': video_uploader_id,
2067 'uploader_url': video_uploader_url,
2068 'upload_date': upload_date,
2069 'license': video_license,
2070 'creator': video_creator or artist,
2071 'title': video_title,
2072 'alt_title': video_alt_title or track,
2073 'thumbnail': video_thumbnail,
2074 'description': video_description,
2075 'categories': video_categories,
2077 'subtitles': video_subtitles,
2078 'automatic_captions': automatic_captions,
2079 'duration': video_duration,
2080 'age_limit': 18 if age_gate else 0,
2081 'annotations': video_annotations,
2082 'chapters': chapters,
2083 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
2084 'view_count': view_count,
2085 'like_count': like_count,
2086 'dislike_count': dislike_count,
2087 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
2090 'start_time': start_time,
2091 'end_time': end_time,
2093 'season_number': season_number,
2094 'episode_number': episode_number,
2100 class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
2101 IE_DESC = 'YouTube.com playlists'
2102 _VALID_URL = r"""(?x)(?:
2108 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
2109 \? (?:.*?[&;])*? (?:p|a|list)=
2112 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
2115 (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
2116 # Top tracks, they can also include dots
2122 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2123 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
2124 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
2125 IE_NAME = 'youtube:playlist'
2127 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2129 'title': 'ytdl test PL',
2130 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2132 'playlist_count': 3,
2134 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2136 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2137 'title': 'YDL_Empty_List',
2139 'playlist_count': 0,
2140 'skip': 'This playlist is private',
2142 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2143 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2145 'title': '29C3: Not my department',
2146 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2148 'playlist_count': 95,
2150 'note': 'issue #673',
2151 'url': 'PLBB231211A4F62143',
2153 'title': '[OLD]Team Fortress 2 (Class-based LP)',
2154 'id': 'PLBB231211A4F62143',
2156 'playlist_mincount': 26,
2158 'note': 'Large playlist',
2159 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2161 'title': 'Uploads from Cauchemar',
2162 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2164 'playlist_mincount': 799,
2166 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2168 'title': 'YDL_safe_search',
2169 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2171 'playlist_count': 2,
2172 'skip': 'This playlist is private',
2175 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2176 'playlist_count': 4,
2179 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
2182 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2183 'playlist_mincount': 485,
2185 'title': '2017 čÆčŖęę°å®ę² (2/24ę“ę°)',
2186 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2189 'note': 'Embedded SWF player',
2190 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
2191 'playlist_count': 4,
2194 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
2197 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2198 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2200 'title': 'Uploads from Interstellar Movie',
2201 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
2203 'playlist_mincount': 21,
2205 # Playlist URL that does not actually serve a playlist
2206 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2208 'id': 'FqZTN594JQw',
2210 'title': "Smiley's People 01 detective, Adventure Series, Action",
2211 'uploader': 'STREEM',
2212 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
2213 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
2214 'upload_date': '20150526',
2215 'license': 'Standard YouTube License',
2216 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2217 'categories': ['People & Blogs'],
2220 'dislike_count': int,
2223 'skip_download': True,
2225 'add_ie': [YoutubeIE.ie_key()],
2227 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2229 'id': 'yeWKywCrFtk',
2231 'title': 'Small Scale Baler and Braiding Rugs',
2232 'uploader': 'Backus-Page House Museum',
2233 'uploader_id': 'backuspagemuseum',
2234 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
2235 'upload_date': '20161008',
2236 'license': 'Standard YouTube License',
2237 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2238 'categories': ['Nonprofits & Activism'],
2241 'dislike_count': int,
2245 'skip_download': True,
2248 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2249 'only_matching': True,
2251 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2252 'only_matching': True,
2255 def _real_initialize(self):
2258 def _extract_mix(self, playlist_id):
2259 # The mixes are generated from a single video
2260 # the id of the playlist is just 'RD' + video_id
2262 last_id = playlist_id[-11:]
2263 for n in itertools.count(1):
2264 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2265 webpage = self._download_webpage(
2266 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2267 new_ids = orderedSet(re.findall(
2268 r'''(?xs
)data
-video
-username
=".*?".*?
2269 href
="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?
list=%s''' % re.escape(playlist_id),
2271 # Fetch new pages until all the videos are repeated, it seems that
2272 # there are always 51 unique videos.
2273 new_ids = [_id for _id in new_ids if _id not in ids]
2279 url_results = self._ids_to_results(ids)
2281 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
2283 search_title('playlist-title') or
2284 search_title('title long-title') or
2285 search_title('title'))
2286 title = clean_html(title_span)
2288 return self.playlist_result(url_results, playlist_id, title)
2290 def _extract_playlist(self, playlist_id):
2291 url = self._TEMPLATE_URL % playlist_id
2292 page = self._download_webpage(url, playlist_id)
2294 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2295 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
2296 match = match.strip()
2297 # Check if the playlist exists or is private
2298 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2300 reason = mobj.group('reason')
2301 message = 'This playlist %s' % reason
2302 if 'private' in reason:
2303 message += ', use --username or --netrc to access it'
2305 raise ExtractorError(message, expected=True)
2306 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2307 raise ExtractorError(
2308 'Invalid parameters. Maybe URL is incorrect.',
2310 elif re.match(r'[^<]*Choose your language[^<]*', match):
2313 self.report_warning('Youtube gives an alert message: ' + match)
2315 playlist_title = self._html_search_regex(
2316 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
2317 page, 'title', default=None)
2319 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2320 uploader = self._search_regex(
2321 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2322 page, 'uploader', default=None)
2324 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2327 uploader_id = mobj.group('uploader_id')
2328 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2330 uploader_id = uploader_url = None
2334 if not playlist_title:
2336 # Some playlist URLs don't actually serve a playlist (e.g.
2337 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2338 next(self._entries(page, playlist_id))
2339 except StopIteration:
2342 playlist = self.playlist_result(
2343 self._entries(page, playlist_id), playlist_id, playlist_title)
2345 'uploader': uploader,
2346 'uploader_id': uploader_id,
2347 'uploader_url': uploader_url,
2350 return has_videos, playlist
2352 def _check_download_just_video(self, url, playlist_id):
2353 # Check if it's a video-specific URL
2354 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
2355 video_id = query_dict.get('v', [None])[0] or self._search_regex(
2356 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
2357 'video id', default=None)
2359 if self._downloader.params.get('noplaylist'):
2360 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2361 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
2363 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
2364 return video_id, None
2367 def _real_extract(self, url):
2368 # Extract playlist id
2369 mobj = re.match(self._VALID_URL, url)
2371 raise ExtractorError('Invalid URL: %s' % url)
2372 playlist_id = mobj.group(1) or mobj.group(2)
2374 video_id, video = self._check_download_just_video(url, playlist_id)
2378 if playlist_id.startswith(('RD', 'UL', 'PU')):
2379 # Mixes require a custom extraction process
2380 return self._extract_mix(playlist_id)
2382 has_videos, playlist = self._extract_playlist(playlist_id)
2383 if has_videos or not video_id:
2386 # Some playlist URLs don't actually serve a playlist (see
2387 # https://github.com/rg3/youtube-dl/issues/10537).
2388 # Fallback to plain video extraction if there is a video id
2389 # along with playlist id.
2390 return self.url_result(video_id, 'Youtube', video_id=video_id)
2393 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
2394 IE_DESC = 'YouTube.com channels'
2395 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
2396 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
2397 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
2398 IE_NAME = 'youtube:channel'
2400 'note': 'paginated channel',
2401 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2402 'playlist_mincount': 91,
2404 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2405 'title': 'Uploads from lex will',
2408 'note': 'Age restricted channel',
2409 # from https://www.youtube.com/user/DeusExOfficial
2410 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2411 'playlist_mincount': 64,
2413 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2414 'title': 'Uploads from Deus Ex',
2419 def suitable(cls, url):
2420 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2421 else super(YoutubeChannelIE, cls).suitable(url))
2423 def _build_template_url(self, url, channel_id):
2424 return self._TEMPLATE_URL % channel_id
2426 def _real_extract(self, url):
2427 channel_id = self._match_id(url)
2429 url = self._build_template_url(url, channel_id)
2431 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2432 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2433 # otherwise fallback on channel by page extraction
2434 channel_page = self._download_webpage(
2435 url + '?view=57', channel_id,
2436 'Downloading channel page', fatal=False)
2437 if channel_page is False:
2438 channel_playlist_id = False
2440 channel_playlist_id = self._html_search_meta(
2441 'channelId', channel_page, 'channel id', default=None)
2442 if not channel_playlist_id:
2443 channel_url = self._html_search_meta(
2444 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2445 channel_page, 'channel url', default=None)
2447 channel_playlist_id = self._search_regex(
2448 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2449 channel_url, 'channel id', default=None)
2450 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2451 playlist_id = 'UU' + channel_playlist_id[2:]
2452 return self.url_result(
2453 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
2455 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
2456 autogenerated = re.search(r'''(?x
)
2458 channel
-header
-autogenerated
-label|
2459 yt
-channel
-title
-autogenerated
2460 )[^
"]*"''', channel_page) is not None
2463 # The videos are contained in a single page
2464 # the ajax pages can't be used, they are empty
2467 video_id, 'Youtube', video_id=video_id,
2468 video_title=video_title)
2469 for video_id, video_title in self.extract_videos_from_page(channel_page)]
2470 return self.playlist_result(entries, channel_id)
2473 next(self._entries(channel_page, channel_id))
2474 except StopIteration:
2475 alert_message = self._html_search_regex(
2476 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2477 channel_page, 'alert', default=None, group='alert')
2479 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2481 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
2484 class YoutubeUserIE(YoutubeChannelIE):
2485 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
2486 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
2487 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
2488 IE_NAME = 'youtube:user'
2491 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2492 'playlist_mincount': 320,
2494 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2495 'title': 'Uploads from The Linux Foundation',
2498 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2499 # but not https://www.youtube.com/user/12minuteathlete/videos
2500 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2501 'playlist_mincount': 249,
2503 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2504 'title': 'Uploads from 12 Minute Athlete',
2507 'url': 'ytuser:phihag',
2508 'only_matching': True,
2510 'url': 'https://www.youtube.com/c/gametrailers',
2511 'only_matching': True,
2513 'url': 'https://www.youtube.com/gametrailers',
2514 'only_matching': True,
2516 # This channel is not available, geo restricted to JP
2517 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2518 'only_matching': True,
2522 def suitable(cls, url):
2523 # Don't return True if the url can be extracted with other youtube
2524 # extractor, the regex would is too permissive and it would match.
2525 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2526 if any(ie.suitable(url) for ie in other_yt_ies):
2529 return super(YoutubeUserIE, cls).suitable(url)
2531 def _build_template_url(self, url, channel_id):
2532 mobj = re.match(self._VALID_URL, url)
2533 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2536 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2537 IE_DESC = 'YouTube.com live streams'
2538 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
2539 IE_NAME = 'youtube:live'
2542 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2544 'id': 'a48o2S1cPoo',
2546 'title': 'The Young Turks - Live Main Show',
2547 'uploader': 'The Young Turks',
2548 'uploader_id': 'TheYoungTurks',
2549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2550 'upload_date': '20150715',
2551 'license': 'Standard YouTube License',
2552 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2553 'categories': ['News & Politics'],
2554 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2556 'dislike_count': int,
2559 'skip_download': True,
2562 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2563 'only_matching': True,
2565 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2566 'only_matching': True,
2568 'url': 'https://www.youtube.com/TheYoungTurks/live',
2569 'only_matching': True,
2572 def _real_extract(self, url):
2573 mobj = re.match(self._VALID_URL, url)
2574 channel_id = mobj.group('id')
2575 base_url = mobj.group('base_url')
2576 webpage = self._download_webpage(url, channel_id, fatal=False)
2578 page_type = self._og_search_property(
2579 'type', webpage, 'page type', default='')
2580 video_id = self._html_search_meta(
2581 'videoId', webpage, 'video id', default=None)
2582 if page_type.startswith('video') and video_id and re.match(
2583 r'^[0-9A-Za-z_-]{11}$', video_id):
2584 return self.url_result(video_id, YoutubeIE.ie_key())
2585 return self.url_result(base_url)
2588 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2589 IE_DESC = 'YouTube.com user/channel playlists'
2590 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2591 IE_NAME = 'youtube:playlists'
2594 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2595 'playlist_mincount': 4,
2597 'id': 'ThirstForScience',
2598 'title': 'Thirst for Science',
2601 # with "Load more" button
2602 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2603 'playlist_mincount': 70,
2606 'title': 'ŠŠ³Š¾ŃŃ ŠŠ»ŠµŠ¹Š½ŠµŃ',
2609 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2610 'playlist_mincount': 17,
2612 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2613 'title': 'Chem Player',
2618 class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2619 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2622 class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
2623 IE_DESC = 'YouTube.com searches'
2624 # there doesn't appear to be a real limit, for example if you search for
2625 # 'python' you get more than 8.000.000 results
2626 _MAX_RESULTS = float('inf')
2627 IE_NAME = 'youtube:search'
2628 _SEARCH_KEY = 'ytsearch'
2629 _EXTRA_QUERY_ARGS = {}
2632 def _get_n_results(self, query, n):
2633 """Get a specified number of results for a query"""
2639 'search_query': query.encode('utf-8'),
2641 url_query.update(self._EXTRA_QUERY_ARGS)
2642 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2644 for pagenum in itertools.count(1):
2645 data = self._download_json(
2646 result_url, video_id='query "%s"' % query,
2647 note='Downloading page %s' % pagenum,
2648 errnote='Unable to download API page',
2649 query={'spf': 'navigate'})
2650 html_content = data[1]['body']['content']
2652 if 'class="search-message' in html_content:
2653 raise ExtractorError(
2654 '[youtube] No video results', expected=True)
2656 new_videos = list(self._process_page(html_content))
2657 videos += new_videos
2658 if not new_videos or len(videos) > limit:
2660 next_link = self._html_search_regex(
2661 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2662 html_content, 'next link', default=None)
2663 if next_link is None:
2665 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
2669 return self.playlist_result(videos, query)
2672 class YoutubeSearchDateIE(YoutubeSearchIE):
2673 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
2674 _SEARCH_KEY = 'ytsearchdate'
2675 IE_DESC = 'YouTube.com searches, newest videos first'
2676 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
2679 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
2680 IE_DESC = 'YouTube.com search URLs'
2681 IE_NAME = 'youtube:search_url'
2682 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
2684 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2685 'playlist_mincount': 5,
2687 'title': 'youtube-dl test video',
2690 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2691 'only_matching': True,
2694 def _real_extract(self, url):
2695 mobj = re.match(self._VALID_URL, url)
2696 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
2697 webpage = self._download_webpage(url, query)
2698 return self.playlist_result(self._process_page(webpage), playlist_title=query)
2701 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
2702 IE_DESC = 'YouTube.com (multi-season) shows'
2703 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
2704 IE_NAME = 'youtube:show'
2706 'url': 'https://www.youtube.com/show/airdisasters',
2707 'playlist_mincount': 5,
2709 'id': 'airdisasters',
2710 'title': 'Air Disasters',
2714 def _real_extract(self, url):
2715 playlist_id = self._match_id(url)
2716 return super(YoutubeShowIE, self)._real_extract(
2717 'https://www.youtube.com/show/%s/playlists' % playlist_id)
2720 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
2722 Base class for feed extractors
2723 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2725 _LOGIN_REQUIRED = True
2729 return 'youtube:%s' % self._FEED_NAME
2731 def _real_initialize(self):
2734 def _entries(self, page):
2735 # The extraction process is the same as for playlists, but the regex
2736 # for the video ids doesn't contain an index
2738 more_widget_html = content_html = page
2739 for page_num in itertools.count(1):
2740 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2742 # 'recommended' feed has infinite 'load more' and each new portion spins
2743 # the same videos in (sometimes) slightly different order, so we'll check
2744 # for unicity and break when portion has no new videos
2745 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
2751 for entry in self._ids_to_results(new_ids):
2754 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2758 more = self._download_json(
2759 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2760 'Downloading page #%s' % page_num,
2761 transform_source=uppercase_escape)
2762 content_html = more['content_html']
2763 more_widget_html = more['load_more_widget_html']
2765 def _real_extract(self, url):
2766 page = self._download_webpage(
2767 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2768 self._PLAYLIST_TITLE)
2769 return self.playlist_result(
2770 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
2773 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2774 IE_NAME = 'youtube:watchlater'
2775 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2776 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
2779 'url': 'https://www.youtube.com/playlist?list=WL',
2780 'only_matching': True,
2782 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2783 'only_matching': True,
2786 def _real_extract(self, url):
2787 _, video = self._check_download_just_video(url, 'WL')
2790 _, playlist = self._extract_playlist('WL')
2794 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2795 IE_NAME = 'youtube:favorites'
2796 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2797 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2798 _LOGIN_REQUIRED = True
2800 def _real_extract(self, url):
2801 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2802 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2803 return self.url_result(playlist_id, 'YoutubePlaylist')
2806 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2807 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2808 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2809 _FEED_NAME = 'recommended'
2810 _PLAYLIST_TITLE = 'Youtube Recommended videos'
2813 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2814 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2815 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2816 _FEED_NAME = 'subscriptions'
2817 _PLAYLIST_TITLE = 'Youtube Subscriptions'
2820 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2821 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2822 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
2823 _FEED_NAME = 'history'
2824 _PLAYLIST_TITLE = 'Youtube History'
2827 class YoutubeTruncatedURLIE(InfoExtractor):
2828 IE_NAME = 'youtube:truncated_url'
2829 IE_DESC = False # Do not list
2830 _VALID_URL = r'''(?x
)
2832 (?
:\w
+\
.)?
[yY
][oO
][uU
][tT
][uU
][bB
][eE
](?
:-nocookie
)?\
.com
/
2835 annotation_id
=annotation_
[^
&]+|
2841 attribution_link
\?a
=[^
&]+
2847 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
2848 'only_matching': True,
2850 'url': 'https://www.youtube.com/watch?',
2851 'only_matching': True,
2853 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2854 'only_matching': True,
2856 'url': 'https://www.youtube.com/watch?feature=foo',
2857 'only_matching': True,
2859 'url': 'https://www.youtube.com/watch?hl=en-GB',
2860 'only_matching': True,
2862 'url': 'https://www.youtube.com/watch?t=2372',
2863 'only_matching': True,
2866 def _real_extract(self, url):
2867 raise ExtractorError(
2868 'Did you forget to quote the URL? Remember that & is a meta '
2869 'character in most shells, so you want to put the URL in quotes, '
2871 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2872 ' or simply youtube-dl BaW_jenozKc .',
2876 class YoutubeTruncatedIDIE(InfoExtractor):
2877 IE_NAME = 'youtube:truncated_id'
2878 IE_DESC = False # Do not list
2879 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2882 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2883 'only_matching': True,
2886 def _real_extract(self, url):
2887 video_id = self._match_id(url)
2888 raise ExtractorError(
2889 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),