9 from .common
import InfoExtractor
, SearchInfoExtractor
15 compat_urllib_request
,
26 class YoutubeBaseInfoExtractor(InfoExtractor
):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL
= 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL
= r
'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE
= 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED
= False
35 def report_lang(self
):
36 """Report attempt to set language."""
37 self
.to_screen(u
'Setting language')
39 def _set_language(self
):
40 request
= compat_urllib_request
.Request(self
._LANG
_URL
)
43 compat_urllib_request
.urlopen(request
).read()
44 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
45 self
._downloader
.report_warning(u
'unable to set language: %s' % compat_str(err
))
50 (username
, password
) = self
._get
_login
_info
()
51 # No authentication to be performed
53 if self
._LOGIN
_REQUIRED
:
54 raise ExtractorError(u
'No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True)
57 request
= compat_urllib_request
.Request(self
._LOGIN
_URL
)
59 login_page
= compat_urllib_request
.urlopen(request
).read().decode('utf-8')
60 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
61 self
._downloader
.report_warning(u
'unable to fetch login page: %s' % compat_str(err
))
66 match
= re
.search(re
.compile(r
'<input.+?name="GALX".+?value="(.+?)"', re
.DOTALL
), login_page
)
69 match
= re
.search(re
.compile(r
'<input.+?name="dsh".+?value="(.+?)"', re
.DOTALL
), login_page
)
75 u
'continue': u
'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u
'PersistentCookie': u
'yes',
81 u
'bgresponse': u
'js_disabled',
82 u
'checkConnection': u
'',
83 u
'checkedDomains': u
'youtube',
89 u
'signIn': u
'Sign in',
91 u
'service': u
'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form
= dict((k
.encode('utf-8'), v
.encode('utf-8')) for k
,v
in login_form_strs
.items())
98 login_data
= compat_urllib_parse
.urlencode(login_form
).encode('ascii')
99 request
= compat_urllib_request
.Request(self
._LOGIN
_URL
, login_data
)
102 login_results
= compat_urllib_request
.urlopen(request
).read().decode('utf-8')
103 if re
.search(r
'(?i)<form[^>]* id="gaia_loginform"', login_results
) is not None:
104 self
._downloader
.report_warning(u
'unable to log in: bad username or password')
106 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
107 self
._downloader
.report_warning(u
'unable to log in: %s' % compat_str(err
))
111 def _confirm_age(self
):
114 'action_confirm': 'Confirm',
116 request
= compat_urllib_request
.Request(self
._AGE
_URL
, compat_urllib_parse
.urlencode(age_form
))
118 self
.report_age_confirmation()
119 compat_urllib_request
.urlopen(request
).read().decode('utf-8')
120 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
121 raise ExtractorError(u
'Unable to confirm age: %s' % compat_str(err
))
124 def _real_initialize(self
):
125 if self
._downloader
is None:
127 if not self
._set
_language
():
129 if not self
._login
():
133 class YoutubeIE(YoutubeBaseInfoExtractor
):
134 IE_DESC
= u
'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
149 )? # optional -> youtube.com/xxxx is OK
150 )? # all until now is optional -> you can pass the naked ID
151 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
152 (?(1).+)? # if we found the ID, everything can follow
154 _NEXT_URL_RE
= r
'[\?&]next_url=([^&]+)'
155 # Listed in order of quality
156 _available_formats
= ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
157 '95', '94', '93', '92', '132', '151',
159 '85', '84', '102', '83', '101', '82', '100',
161 '138', '137', '248', '136', '247', '135', '246',
162 '245', '244', '134', '243', '133', '242', '160',
164 '141', '172', '140', '171', '139',
166 _available_formats_prefer_free
= ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
167 '95', '94', '93', '92', '132', '151',
168 '85', '102', '84', '101', '83', '100', '82',
170 '138', '248', '137', '247', '136', '246', '245',
171 '244', '135', '243', '134', '242', '133', '160',
173 '172', '141', '171', '140', '139',
175 _video_extensions
= {
196 # videos that use m3u8
228 _video_dimensions
= {
309 u
"url": u
"http://www.youtube.com/watch?v=BaW_jenozKc",
310 u
"file": u
"BaW_jenozKc.mp4",
312 u
"title": u
"youtube-dl test video \"'/\\ä↭𝕐",
313 u
"uploader": u
"Philipp Hagemeister",
314 u
"uploader_id": u
"phihag",
315 u
"upload_date": u
"20121002",
316 u
"description": u
"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
320 u
"url": u
"http://www.youtube.com/watch?v=1ltcDfZMA3U",
321 u
"file": u
"1ltcDfZMA3U.flv",
322 u
"note": u
"Test VEVO video (#897)",
324 u
"upload_date": u
"20070518",
325 u
"title": u
"Maps - It Will Find You",
326 u
"description": u
"Music video by Maps performing It Will Find You.",
327 u
"uploader": u
"MuteUSA",
328 u
"uploader_id": u
"MuteUSA"
332 u
"url": u
"http://www.youtube.com/watch?v=UxxajLWwzqY",
333 u
"file": u
"UxxajLWwzqY.mp4",
334 u
"note": u
"Test generic use_cipher_signature video (#897)",
336 u
"upload_date": u
"20120506",
337 u
"title": u
"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
338 u
"description": u
"md5:3e2666e0a55044490499ea45fe9037b7",
339 u
"uploader": u
"Icona Pop",
340 u
"uploader_id": u
"IconaPop"
344 u
"url": u
"https://www.youtube.com/watch?v=07FYdnEawAQ",
345 u
"file": u
"07FYdnEawAQ.mp4",
346 u
"note": u
"Test VEVO video with age protection (#956)",
348 u
"upload_date": u
"20130703",
349 u
"title": u
"Justin Timberlake - Tunnel Vision (Explicit)",
350 u
"description": u
"md5:64249768eec3bc4276236606ea996373",
351 u
"uploader": u
"justintimberlakeVEVO",
352 u
"uploader_id": u
"justintimberlakeVEVO"
356 u
'url': u
'https://www.youtube.com/watch?v=TGi3HqYrWHE',
357 u
'file': u
'TGi3HqYrWHE.mp4',
358 u
'note': u
'm3u8 video',
360 u
'title': u
'Triathlon - Men - London 2012 Olympic Games',
361 u
'description': u
'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
362 u
'uploader': u
'olympic',
363 u
'upload_date': u
'20120807',
364 u
'uploader_id': u
'olympic',
367 u
'skip_download': True,
374 def suitable(cls
, url
):
375 """Receives a URL and returns True if suitable for this IE."""
376 if YoutubePlaylistIE
.suitable(url
) or YoutubeSubscriptionsIE
.suitable(url
): return False
377 return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None
379 def report_video_webpage_download(self
, video_id
):
380 """Report attempt to download video webpage."""
381 self
.to_screen(u
'%s: Downloading video webpage' % video_id
)
383 def report_video_info_webpage_download(self
, video_id
):
384 """Report attempt to download video info webpage."""
385 self
.to_screen(u
'%s: Downloading video info webpage' % video_id
)
387 def report_video_subtitles_download(self
, video_id
):
388 """Report attempt to download video info webpage."""
389 self
.to_screen(u
'%s: Checking available subtitles' % video_id
)
391 def report_video_subtitles_request(self
, video_id
, sub_lang
, format
):
392 """Report attempt to download video info webpage."""
393 self
.to_screen(u
'%s: Downloading video subtitles for %s.%s' % (video_id
, sub_lang
, format
))
395 def report_video_subtitles_available(self
, video_id
, sub_lang_list
):
396 """Report available subtitles."""
397 sub_lang
= ",".join(list(sub_lang_list
.keys()))
398 self
.to_screen(u
'%s: Available subtitles for video: %s' % (video_id
, sub_lang
))
400 def report_information_extraction(self
, video_id
):
401 """Report attempt to extract video information."""
402 self
.to_screen(u
'%s: Extracting video information' % video_id
)
404 def report_unavailable_format(self
, video_id
, format
):
405 """Report extracted video URL."""
406 self
.to_screen(u
'%s: Format %s not available' % (video_id
, format
))
408 def report_rtmp_download(self
):
409 """Indicate the download will use the RTMP protocol."""
410 self
.to_screen(u
'RTMP download detected')
412 def _decrypt_signature(self
, s
):
413 """Turn the encrypted s field into a working signature"""
416 return s
[25] + s
[3:25] + s
[0] + s
[26:42] + s
[79] + s
[43:79] + s
[91] + s
[80:83]
418 return s
[25] + s
[3:25] + s
[2] + s
[26:40] + s
[77] + s
[41:77] + s
[89] + s
[78:81]
420 return s
[84:78:-1] + s
[87] + s
[77:60:-1] + s
[0] + s
[59:3:-1]
422 return s
[7:28] + s
[87] + s
[29:45] + s
[55] + s
[46:55] + s
[2] + s
[56:87] + s
[28]
424 return s
[6:27] + s
[4] + s
[28:39] + s
[27] + s
[40:59] + s
[2] + s
[60:]
426 return s
[83:36:-1] + s
[0] + s
[35:2:-1]
428 return s
[83:34:-1] + s
[0] + s
[33:27:-1] + s
[3] + s
[26:19:-1] + s
[34] + s
[18:3:-1] + s
[27]
430 return s
[81:36:-1] + s
[0] + s
[35:2:-1]
432 return s
[81:64:-1] + s
[82] + s
[63:52:-1] + s
[45] + s
[51:45:-1] + s
[1] + s
[44:1:-1] + s
[0]
434 return s
[1:19] + s
[0] + s
[20:68] + s
[19] + s
[69:82]
436 return s
[56] + s
[79:56:-1] + s
[41] + s
[55:41:-1] + s
[80] + s
[40:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9]
438 return s
[1:19] + s
[0] + s
[20:68] + s
[19] + s
[69:80]
440 return s
[54] + s
[77:54:-1] + s
[39] + s
[53:39:-1] + s
[78] + s
[38:34:-1] + s
[0] + s
[33:29:-1] + s
[34] + s
[28:9:-1] + s
[29] + s
[8:0:-1] + s
[9]
443 raise ExtractorError(u
'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s
)))
445 def _decrypt_signature_age_gate(self
, s
):
446 # The videos with age protection use another player, so the algorithms
449 return s
[2:63] + s
[82] + s
[64:82] + s
[63]
451 # Fallback to the other algortihms
452 return self
._decrypt
_signature
(s
)
455 def _get_available_subtitles(self
, video_id
):
456 self
.report_video_subtitles_download(video_id
)
457 request
= compat_urllib_request
.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id
)
459 sub_list
= compat_urllib_request
.urlopen(request
).read().decode('utf-8')
460 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
461 self
._downloader
.report_warning(u
'unable to download video subtitles: %s' % compat_str(err
))
463 sub_lang_list
= re
.findall(r
'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list
)
464 sub_lang_list
= dict((l
[1], l
[0]) for l
in sub_lang_list
)
465 if not sub_lang_list
:
466 self
._downloader
.report_warning(u
'video doesn\'t have subtitles')
470 def _list_available_subtitles(self
, video_id
):
471 sub_lang_list
= self
._get
_available
_subtitles
(video_id
)
472 self
.report_video_subtitles_available(video_id
, sub_lang_list
)
474 def _request_subtitle(self
, sub_lang
, sub_name
, video_id
, format
):
476 Return the subtitle as a string or None if they are not found
478 self
.report_video_subtitles_request(video_id
, sub_lang
, format
)
479 params
= compat_urllib_parse
.urlencode({
485 url
= 'http://www.youtube.com/api/timedtext?' + params
487 sub
= compat_urllib_request
.urlopen(url
).read().decode('utf-8')
488 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
489 self
._downloader
.report_warning(u
'unable to download video subtitles for %s: %s' % (sub_lang
, compat_str(err
)))
492 self
._downloader
.report_warning(u
'Did not fetch video subtitles')
496 def _request_automatic_caption(self
, video_id
, webpage
):
497 """We need the webpage for getting the captions url, pass it as an
498 argument to speed up the process."""
499 sub_lang
= (self
._downloader
.params
.get('subtitleslangs') or ['en'])[0]
500 sub_format
= self
._downloader
.params
.get('subtitlesformat')
501 self
.to_screen(u
'%s: Looking for automatic captions' % video_id
)
502 mobj
= re
.search(r
';ytplayer.config = ({.*?});', webpage
)
503 err_msg
= u
'Couldn\'t find automatic captions for "%s"' % sub_lang
505 self
._downloader
.report_warning(err_msg
)
507 player_config
= json
.loads(mobj
.group(1))
509 args
= player_config
[u
'args']
510 caption_url
= args
[u
'ttsurl']
511 timestamp
= args
[u
'timestamp']
512 params
= compat_urllib_parse
.urlencode({
519 subtitles_url
= caption_url
+ '&' + params
520 sub
= self
._download
_webpage
(subtitles_url
, video_id
, u
'Downloading automatic captions')
521 return {sub_lang
: sub
}
522 # An extractor error can be raise by the download process if there are
523 # no automatic captions but there are subtitles
524 except (KeyError, ExtractorError
):
525 self
._downloader
.report_warning(err_msg
)
528 def _extract_subtitles(self
, video_id
):
530 Return a dictionary: {language: subtitles} or {} if the subtitles
533 available_subs_list
= self
._get
_available
_subtitles
(video_id
)
534 sub_format
= self
._downloader
.params
.get('subtitlesformat')
535 if not available_subs_list
: #There was some error, it didn't get the available subtitles
537 if self
._downloader
.params
.get('allsubtitles', False):
538 sub_lang_list
= available_subs_list
540 if self
._downloader
.params
.get('subtitleslangs', False):
541 reqested_langs
= self
._downloader
.params
.get('subtitleslangs')
542 elif 'en' in available_subs_list
:
543 reqested_langs
= ['en']
545 reqested_langs
= [list(available_subs_list
.keys())[0]]
548 for sub_lang
in reqested_langs
:
549 if not sub_lang
in available_subs_list
:
550 self
._downloader
.report_warning(u
'no closed captions found in the specified language "%s"' % sub_lang
)
552 sub_lang_list
[sub_lang
] = available_subs_list
[sub_lang
]
554 for sub_lang
in sub_lang_list
:
555 subtitle
= self
._request
_subtitle
(sub_lang
, sub_lang_list
[sub_lang
].encode('utf-8'), video_id
, sub_format
)
557 subtitles
[sub_lang
] = subtitle
560 def _print_formats(self
, formats
):
561 print('Available formats:')
563 print('%s\t:\t%s\t[%s]%s' %(x
, self
._video
_extensions
.get(x
, 'flv'),
564 self
._video
_dimensions
.get(x
, '???'),
565 ' ('+self
._special
_itags
[x
]+')' if x
in self
._special
_itags
else ''))
567 def _extract_id(self
, url
):
568 mobj
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
)
570 raise ExtractorError(u
'Invalid URL: %s' % url
)
571 video_id
= mobj
.group(2)
574 def _get_video_url_list(self
, url_map
):
576 Transform a dictionary in the format {itag:url} to a list of (itag, url)
577 with the requested formats.
579 req_format
= self
._downloader
.params
.get('format', None)
580 format_limit
= self
._downloader
.params
.get('format_limit', None)
581 available_formats
= self
._available
_formats
_prefer
_free
if self
._downloader
.params
.get('prefer_free_formats', False) else self
._available
_formats
582 if format_limit
is not None and format_limit
in available_formats
:
583 format_list
= available_formats
[available_formats
.index(format_limit
):]
585 format_list
= available_formats
586 existing_formats
= [x
for x
in format_list
if x
in url_map
]
587 if len(existing_formats
) == 0:
588 raise ExtractorError(u
'no known formats available for video')
589 if self
._downloader
.params
.get('listformats', None):
590 self
._print
_formats
(existing_formats
)
592 if req_format
is None or req_format
== 'best':
593 video_url_list
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality
594 elif req_format
== 'worst':
595 video_url_list
= [(existing_formats
[-1], url_map
[existing_formats
[-1]])] # worst quality
596 elif req_format
in ('-1', 'all'):
597 video_url_list
= [(f
, url_map
[f
]) for f
in existing_formats
] # All formats
599 # Specific formats. We pick the first in a slash-delimeted sequence.
600 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
601 req_formats
= req_format
.split('/')
602 video_url_list
= None
603 for rf
in req_formats
:
605 video_url_list
= [(rf
, url_map
[rf
])]
607 if video_url_list
is None:
608 raise ExtractorError(u
'requested format not available')
609 return video_url_list
611 def _extract_from_m3u8(self
, manifest_url
, video_id
):
613 def _get_urls(_manifest
):
614 lines
= _manifest
.split('\n')
615 urls
= filter(lambda l
: l
and not l
.startswith('#'),
618 manifest
= self
._download
_webpage
(manifest_url
, video_id
, u
'Downloading formats manifest')
619 formats_urls
= _get_urls(manifest
)
620 for format_url
in formats_urls
:
621 itag
= self
._search
_regex
(r
'itag/(\d+?)/', format_url
, 'itag')
622 url_map
[itag
] = format_url
625 def _real_extract(self
, url
):
626 if re
.match(r
'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url
):
627 self
._downloader
.report_warning(u
'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
629 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
630 mobj
= re
.search(self
._NEXT
_URL
_RE
, url
)
632 url
= 'https://www.youtube.com/' + compat_urllib_parse
.unquote(mobj
.group(1)).lstrip('/')
633 video_id
= self
._extract
_id
(url
)
636 self
.report_video_webpage_download(video_id
)
637 url
= 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
638 request
= compat_urllib_request
.Request(url
)
640 video_webpage_bytes
= compat_urllib_request
.urlopen(request
).read()
641 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
642 raise ExtractorError(u
'Unable to download video webpage: %s' % compat_str(err
))
644 video_webpage
= video_webpage_bytes
.decode('utf-8', 'ignore')
646 # Attempt to extract SWF player URL
647 mobj
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
)
649 player_url
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1))
654 self
.report_video_info_webpage_download(video_id
)
655 if re
.search(r
'player-age-gate-content">', video_webpage
) is not None:
656 self
.report_age_confirmation()
658 # We simulate the access to the video from www.youtube.com/v/{video_id}
659 # this can be viewed without login into Youtube
660 data
= compat_urllib_parse
.urlencode({'video_id': video_id
,
664 'eurl': 'https://youtube.googleapis.com/v/' + video_id
,
668 video_info_url
= 'https://www.youtube.com/get_video_info?' + data
669 video_info_webpage
= self
._download
_webpage
(video_info_url
, video_id
,
671 errnote
='unable to download video info webpage')
672 video_info
= compat_parse_qs(video_info_webpage
)
675 for el_type
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
676 video_info_url
= ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
677 % (video_id
, el_type
))
678 video_info_webpage
= self
._download
_webpage
(video_info_url
, video_id
,
680 errnote
='unable to download video info webpage')
681 video_info
= compat_parse_qs(video_info_webpage
)
682 if 'token' in video_info
:
684 if 'token' not in video_info
:
685 if 'reason' in video_info
:
686 raise ExtractorError(u
'YouTube said: %s' % video_info
['reason'][0], expected
=True)
688 raise ExtractorError(u
'"token" parameter not in video info for unknown reason')
690 # Check for "rental" videos
691 if 'ypc_video_rental_bar_text' in video_info
and 'author' not in video_info
:
692 raise ExtractorError(u
'"rental" videos not supported')
694 # Start extracting information
695 self
.report_information_extraction(video_id
)
698 if 'author' not in video_info
:
699 raise ExtractorError(u
'Unable to extract uploader name')
700 video_uploader
= compat_urllib_parse
.unquote_plus(video_info
['author'][0])
703 video_uploader_id
= None
704 mobj
= re
.search(r
'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage
)
706 video_uploader_id
= mobj
.group(1)
708 self
._downloader
.report_warning(u
'unable to extract uploader nickname')
711 if 'title' not in video_info
:
712 raise ExtractorError(u
'Unable to extract video title')
713 video_title
= compat_urllib_parse
.unquote_plus(video_info
['title'][0])
716 # We try first to get a high quality image:
717 m_thumb
= re
.search(r
'<span itemprop="thumbnail".*?href="(.*?)">',
718 video_webpage
, re
.DOTALL
)
719 if m_thumb
is not None:
720 video_thumbnail
= m_thumb
.group(1)
721 elif 'thumbnail_url' not in video_info
:
722 self
._downloader
.report_warning(u
'unable to extract video thumbnail')
724 else: # don't panic if we can't find it
725 video_thumbnail
= compat_urllib_parse
.unquote_plus(video_info
['thumbnail_url'][0])
729 mobj
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
)
731 upload_date
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split())
732 upload_date
= unified_strdate(upload_date
)
735 video_description
= get_element_by_id("eow-description", video_webpage
)
736 if video_description
:
737 video_description
= clean_html(video_description
)
739 fd_mobj
= re
.search(r
'<meta name="description" content="([^"]+)"', video_webpage
)
741 video_description
= unescapeHTML(fd_mobj
.group(1))
743 video_description
= u
''
746 video_subtitles
= None
748 if self
._downloader
.params
.get('writesubtitles', False) or self
._downloader
.params
.get('allsubtitles', False):
749 video_subtitles
= self
._extract
_subtitles
(video_id
)
750 elif self
._downloader
.params
.get('writeautomaticsub', False):
751 video_subtitles
= self
._request
_automatic
_caption
(video_id
, video_webpage
)
753 if self
._downloader
.params
.get('listsubtitles', False):
754 self
._list
_available
_subtitles
(video_id
)
757 if 'length_seconds' not in video_info
:
758 self
._downloader
.report_warning(u
'unable to extract video duration')
761 video_duration
= compat_urllib_parse
.unquote_plus(video_info
['length_seconds'][0])
763 # Decide which formats to download
766 mobj
= re
.search(r
';ytplayer.config = ({.*?});', video_webpage
)
768 raise ValueError('Could not find vevo ID')
769 info
= json
.loads(mobj
.group(1))
771 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
772 # this signatures are encrypted
773 m_s
= re
.search(r
'[&,]s=', args
['url_encoded_fmt_stream_map'])
775 self
.to_screen(u
'%s: Encrypted signatures detected.' % video_id
)
776 video_info
['url_encoded_fmt_stream_map'] = [args
['url_encoded_fmt_stream_map']]
777 m_s
= re
.search(r
'[&,]s=', args
.get('adaptive_fmts', u
''))
779 if 'url_encoded_fmt_stream_map' in video_info
:
780 video_info
['url_encoded_fmt_stream_map'][0] += ',' + args
['adaptive_fmts']
782 video_info
['url_encoded_fmt_stream_map'] = [args
['adaptive_fmts']]
783 elif 'adaptive_fmts' in video_info
:
784 if 'url_encoded_fmt_stream_map' in video_info
:
785 video_info
['url_encoded_fmt_stream_map'][0] += ',' + video_info
['adaptive_fmts'][0]
787 video_info
['url_encoded_fmt_stream_map'] = video_info
['adaptive_fmts']
791 if 'conn' in video_info
and video_info
['conn'][0].startswith('rtmp'):
792 self
.report_rtmp_download()
793 video_url_list
= [(None, video_info
['conn'][0])]
794 elif 'url_encoded_fmt_stream_map' in video_info
and len(video_info
['url_encoded_fmt_stream_map']) >= 1:
795 if 'rtmpe%3Dyes' in video_info
['url_encoded_fmt_stream_map'][0]:
796 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected
=True)
798 for url_data_str
in video_info
['url_encoded_fmt_stream_map'][0].split(','):
799 url_data
= compat_parse_qs(url_data_str
)
800 if 'itag' in url_data
and 'url' in url_data
:
801 url
= url_data
['url'][0]
802 if 'sig' in url_data
:
803 url
+= '&signature=' + url_data
['sig'][0]
804 elif 's' in url_data
:
805 if self
._downloader
.params
.get('verbose'):
808 player_version
= self
._search
_regex
(r
'ad3-(.+?)\.swf',
809 video_info
['ad3_module'][0] if 'ad3_module' in video_info
else 'NOT FOUND',
810 'flash player', fatal
=False)
811 player
= 'flash player %s' % player_version
813 player
= u
'html5 player %s' % self
._search
_regex
(r
'html5player-(.+?)\.js', video_webpage
,
814 'html5 player', fatal
=False)
815 parts_sizes
= u
'.'.join(compat_str(len(part
)) for part
in s
.split('.'))
816 self
.to_screen(u
'encrypted signature length %d (%s), itag %s, %s' %
817 (len(s
), parts_sizes
, url_data
['itag'][0], player
))
818 encrypted_sig
= url_data
['s'][0]
820 signature
= self
._decrypt
_signature
_age
_gate
(encrypted_sig
)
822 signature
= self
._decrypt
_signature
(encrypted_sig
)
823 url
+= '&signature=' + signature
824 if 'ratebypass' not in url
:
825 url
+= '&ratebypass=yes'
826 url_map
[url_data
['itag'][0]] = url
827 video_url_list
= self
._get
_video
_url
_list
(url_map
)
828 if not video_url_list
:
830 elif video_info
.get('hlsvp'):
831 manifest_url
= video_info
['hlsvp'][0]
832 url_map
= self
._extract
_from
_m
3u8(manifest_url
, video_id
)
833 video_url_list
= self
._get
_video
_url
_list
(url_map
)
834 if not video_url_list
:
838 raise ExtractorError(u
'no conn or url_encoded_fmt_stream_map information found in video info')
841 for format_param
, video_real_url
in video_url_list
:
843 video_extension
= self
._video
_extensions
.get(format_param
, 'flv')
845 video_format
= '{0} - {1}{2}'.format(format_param
if format_param
else video_extension
,
846 self
._video
_dimensions
.get(format_param
, '???'),
847 ' ('+self
._special
_itags
[format_param
]+')' if format_param
in self
._special
_itags
else '')
851 'url': video_real_url
,
852 'uploader': video_uploader
,
853 'uploader_id': video_uploader_id
,
854 'upload_date': upload_date
,
855 'title': video_title
,
856 'ext': video_extension
,
857 'format': video_format
,
858 'thumbnail': video_thumbnail
,
859 'description': video_description
,
860 'player_url': player_url
,
861 'subtitles': video_subtitles
,
862 'duration': video_duration
866 class YoutubePlaylistIE(InfoExtractor
):
867 IE_DESC
= u
'YouTube.com playlists'
873 (?:course|view_play_list|my_playlists|artist|playlist|watch)
874 \? (?:.*?&)*? (?:p|a|list)=
877 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
880 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
882 _TEMPLATE_URL
= 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
884 IE_NAME
= u
'youtube:playlist'
887 def suitable(cls
, url
):
888 """Receives a URL and returns True if suitable for this IE."""
889 return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None
891 def _real_extract(self
, url
):
892 # Extract playlist id
893 mobj
= re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
)
895 raise ExtractorError(u
'Invalid URL: %s' % url
)
897 # Download playlist videos from API
898 playlist_id
= mobj
.group(1) or mobj
.group(2)
901 for page_num
in itertools
.count(1):
902 start_index
= self
._MAX
_RESULTS
* (page_num
- 1) + 1
903 if start_index
>= 1000:
904 self
._downloader
.report_warning(u
'Max number of results reached')
906 url
= self
._TEMPLATE
_URL
% (playlist_id
, self
._MAX
_RESULTS
, start_index
)
907 page
= self
._download
_webpage
(url
, playlist_id
, u
'Downloading page #%s' % page_num
)
910 response
= json
.loads(page
)
911 except ValueError as err
:
912 raise ExtractorError(u
'Invalid JSON in API response: ' + compat_str(err
))
914 if 'feed' not in response
:
915 raise ExtractorError(u
'Got a malformed response from YouTube API')
916 playlist_title
= response
['feed']['title']['$t']
917 if 'entry' not in response
['feed']:
918 # Number of videos is a multiple of self._MAX_RESULTS
921 for entry
in response
['feed']['entry']:
922 index
= entry
['yt$position']['$t']
923 if 'media$group' in entry
and 'media$player' in entry
['media$group']:
924 videos
.append((index
, entry
['media$group']['media$player']['url']))
926 videos
= [v
[1] for v
in sorted(videos
)]
928 url_results
= [self
.url_result(vurl
, 'Youtube') for vurl
in videos
]
929 return [self
.playlist_result(url_results
, playlist_id
, playlist_title
)]
932 class YoutubeChannelIE(InfoExtractor
):
933 IE_DESC
= u
'YouTube.com channels'
934 _VALID_URL
= r
"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
935 _TEMPLATE_URL
= 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
936 _MORE_PAGES_INDICATOR
= 'yt-uix-load-more'
937 _MORE_PAGES_URL
= 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
938 IE_NAME
= u
'youtube:channel'
940 def extract_videos_from_page(self
, page
):
942 for mobj
in re
.finditer(r
'href="/watch\?v=([0-9A-Za-z_-]+)&?', page
):
943 if mobj
.group(1) not in ids_in_page
:
944 ids_in_page
.append(mobj
.group(1))
947 def _real_extract(self
, url
):
949 mobj
= re
.match(self
._VALID
_URL
, url
)
951 raise ExtractorError(u
'Invalid URL: %s' % url
)
953 # Download channel page
954 channel_id
= mobj
.group(1)
958 url
= self
._TEMPLATE
_URL
% (channel_id
, pagenum
)
959 page
= self
._download
_webpage
(url
, channel_id
,
960 u
'Downloading page #%s' % pagenum
)
962 # Extract video identifiers
963 ids_in_page
= self
.extract_videos_from_page(page
)
964 video_ids
.extend(ids_in_page
)
966 # Download any subsequent channel pages using the json-based channel_ajax query
967 if self
._MORE
_PAGES
_INDICATOR
in page
:
968 for pagenum
in itertools
.count(1):
969 url
= self
._MORE
_PAGES
_URL
% (pagenum
, channel_id
)
970 page
= self
._download
_webpage
(url
, channel_id
,
971 u
'Downloading page #%s' % pagenum
)
973 page
= json
.loads(page
)
975 ids_in_page
= self
.extract_videos_from_page(page
['content_html'])
976 video_ids
.extend(ids_in_page
)
978 if self
._MORE
_PAGES
_INDICATOR
not in page
['load_more_widget_html']:
981 self
._downloader
.to_screen(u
'[youtube] Channel %s: Found %i videos' % (channel_id
, len(video_ids
)))
983 urls
= ['http://www.youtube.com/watch?v=%s' % id for id in video_ids
]
984 url_entries
= [self
.url_result(eurl
, 'Youtube') for eurl
in urls
]
985 return [self
.playlist_result(url_entries
, channel_id
)]
988 class YoutubeUserIE(InfoExtractor
):
989 IE_DESC
= u
'YouTube.com user videos (URL or "ytuser" keyword)'
990 _VALID_URL
= r
'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
991 _TEMPLATE_URL
= 'http://gdata.youtube.com/feeds/api/users/%s'
992 _GDATA_PAGE_SIZE
= 50
993 _GDATA_URL
= 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
994 _VIDEO_INDICATOR
= r
'/watch\?v=(.+?)[\<&]'
995 IE_NAME
= u
'youtube:user'
997 def _real_extract(self
, url
):
999 mobj
= re
.match(self
._VALID
_URL
, url
)
1001 raise ExtractorError(u
'Invalid URL: %s' % url
)
1003 username
= mobj
.group(1)
1005 # Download video ids using YouTube Data API. Result size per
1006 # query is limited (currently to 50 videos) so we need to query
1007 # page by page until there are no video ids - it means we got
1012 for pagenum
in itertools
.count(0):
1013 start_index
= pagenum
* self
._GDATA
_PAGE
_SIZE
+ 1
1015 gdata_url
= self
._GDATA
_URL
% (username
, self
._GDATA
_PAGE
_SIZE
, start_index
)
1016 page
= self
._download
_webpage
(gdata_url
, username
,
1017 u
'Downloading video ids from %d to %d' % (start_index
, start_index
+ self
._GDATA
_PAGE
_SIZE
))
1019 # Extract video identifiers
1022 for mobj
in re
.finditer(self
._VIDEO
_INDICATOR
, page
):
1023 if mobj
.group(1) not in ids_in_page
:
1024 ids_in_page
.append(mobj
.group(1))
1026 video_ids
.extend(ids_in_page
)
1028 # A little optimization - if current page is not
1029 # "full", ie. does not contain PAGE_SIZE video ids then
1030 # we can assume that this page is the last one - there
1031 # are no more ids on further pages - no need to query
1034 if len(ids_in_page
) < self
._GDATA
_PAGE
_SIZE
:
1037 urls
= ['http://www.youtube.com/watch?v=%s' % video_id
for video_id
in video_ids
]
1038 url_results
= [self
.url_result(rurl
, 'Youtube') for rurl
in urls
]
1039 return [self
.playlist_result(url_results
, playlist_title
= username
)]
1041 class YoutubeSearchIE(SearchInfoExtractor
):
1042 IE_DESC
= u
'YouTube.com searches'
1043 _API_URL
= 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1045 IE_NAME
= u
'youtube:search'
1046 _SEARCH_KEY
= 'ytsearch'
1048 def report_download_page(self
, query
, pagenum
):
1049 """Report attempt to download search page with given number."""
1050 self
._downloader
.to_screen(u
'[youtube] query "%s": Downloading page %s' % (query
, pagenum
))
1052 def _get_n_results(self
, query
, n
):
1053 """Get a specified number of results for a query"""
1059 while (50 * pagenum
) < limit
:
1060 self
.report_download_page(query
, pagenum
+1)
1061 result_url
= self
._API
_URL
% (compat_urllib_parse
.quote_plus(query
), (50*pagenum
)+1)
1062 request
= compat_urllib_request
.Request(result_url
)
1064 data
= compat_urllib_request
.urlopen(request
).read().decode('utf-8')
1065 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
1066 raise ExtractorError(u
'Unable to download API page: %s' % compat_str(err
))
1067 api_response
= json
.loads(data
)['data']
1069 if not 'items' in api_response
:
1070 raise ExtractorError(u
'[youtube] No video results')
1072 new_ids
= list(video
['id'] for video
in api_response
['items'])
1073 video_ids
+= new_ids
1075 limit
= min(n
, api_response
['totalItems'])
1078 if len(video_ids
) > n
:
1079 video_ids
= video_ids
[:n
]
1080 videos
= [self
.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids
]
1081 return self
.playlist_result(videos
, query
)
1084 class YoutubeShowIE(InfoExtractor
):
1085 IE_DESC
= u
'YouTube.com (multi-season) shows'
1086 _VALID_URL
= r
'https?://www\.youtube\.com/show/(.*)'
1087 IE_NAME
= u
'youtube:show'
1089 def _real_extract(self
, url
):
1090 mobj
= re
.match(self
._VALID
_URL
, url
)
1091 show_name
= mobj
.group(1)
1092 webpage
= self
._download
_webpage
(url
, show_name
, u
'Downloading show webpage')
1093 # There's one playlist for each season of the show
1094 m_seasons
= list(re
.finditer(r
'href="(/playlist\?list=.*?)"', webpage
))
1095 self
.to_screen(u
'%s: Found %s seasons' % (show_name
, len(m_seasons
)))
1096 return [self
.url_result('https://www.youtube.com' + season
.group(1), 'YoutubePlaylist') for season
in m_seasons
]
1099 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
):
1101 Base class for extractors that fetch info from
1102 http://www.youtube.com/feed_ajax
1103 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1105 _LOGIN_REQUIRED
= True
1107 # use action_load_personal_feed instead of action_load_system_feed
1108 _PERSONAL_FEED
= False
1111 def _FEED_TEMPLATE(self
):
1112 action
= 'action_load_system_feed'
1113 if self
._PERSONAL
_FEED
:
1114 action
= 'action_load_personal_feed'
1115 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action
, self
._FEED
_NAME
)
1119 return u
'youtube:%s' % self
._FEED
_NAME
1121 def _real_initialize(self
):
1124 def _real_extract(self
, url
):
1126 # The step argument is available only in 2.7 or higher
1127 for i
in itertools
.count(0):
1128 paging
= i
*self
._PAGING
_STEP
1129 info
= self
._download
_webpage
(self
._FEED
_TEMPLATE
% paging
,
1130 u
'%s feed' % self
._FEED
_NAME
,
1131 u
'Downloading page %s' % i
)
1132 info
= json
.loads(info
)
1133 feed_html
= info
['feed_html']
1134 m_ids
= re
.finditer(r
'"/watch\?v=(.*?)["&]', feed_html
)
1135 ids
= orderedSet(m
.group(1) for m
in m_ids
)
1136 feed_entries
.extend(self
.url_result(id, 'Youtube') for id in ids
)
1137 if info
['paging'] is None:
1139 return self
.playlist_result(feed_entries
, playlist_title
=self
._PLAYLIST
_TITLE
)
1141 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
):
1142 IE_DESC
= u
'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1143 _VALID_URL
= r
'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1144 _FEED_NAME
= 'subscriptions'
1145 _PLAYLIST_TITLE
= u
'Youtube Subscriptions'
1147 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
):
1148 IE_DESC
= u
'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1149 _VALID_URL
= r
'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1150 _FEED_NAME
= 'recommended'
1151 _PLAYLIST_TITLE
= u
'Youtube Recommended videos'
1153 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor
):
1154 IE_DESC
= u
'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1155 _VALID_URL
= r
'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1156 _FEED_NAME
= 'watch_later'
1157 _PLAYLIST_TITLE
= u
'Youtube Watch Later'
1159 _PERSONAL_FEED
= True
1161 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
):
1162 IE_NAME
= u
'youtube:favorites'
1163 IE_DESC
= u
'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1164 _VALID_URL
= r
'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
1165 _LOGIN_REQUIRED
= True
1167 def _real_extract(self
, url
):
1168 webpage
= self
._download
_webpage
('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1169 playlist_id
= self
._search
_regex
(r
'list=(.+?)["&]', webpage
, u
'favourites playlist id')
1170 return self
.url_result(playlist_id
, 'YoutubePlaylist')