class YoutubeIE(InfoExtractor):
- """Information extractor for youtube.com."""
-
+ IE_DESC = u'YouTube.com'
_VALID_URL = r"""^
(
(?:https?://)? # http(s):// (optional)
(?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|(?: # or the v= param in all its forms
- (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+ (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #!
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
v=
'18': 'mp4',
'22': 'mp4',
'37': 'mp4',
- '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
+ '38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': '1080x1920',
}
IE_NAME = u'youtube'
+ _TESTS = [
+ {
+ u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
+ u"file": u"BaW_jenozKc.mp4",
+ u"info_dict": {
+ u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
+ u"uploader": u"Philipp Hagemeister",
+ u"uploader_id": u"phihag",
+ u"upload_date": u"20121002",
+ u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+ }
+ },
+ {
+ u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
+ u"file": u"1ltcDfZMA3U.flv",
+ u"note": u"Test VEVO video (#897)",
+ u"info_dict": {
+ u"upload_date": u"20070518",
+ u"title": u"Maps - It Will Find You",
+ u"description": u"Music video by Maps performing It Will Find You.",
+ u"uploader": u"MuteUSA",
+ u"uploader_id": u"MuteUSA"
+ }
+ },
+ {
+ u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
+ u"file": u"UxxajLWwzqY.mp4",
+ u"note": u"Test generic use_cipher_signature video (#897)",
+ u"info_dict": {
+ u"upload_date": u"20120506",
+ u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
+ u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
+ u"uploader": u"IconaPop",
+ u"uploader_id": u"IconaPop"
+ }
+ }
+ ]
+
@classmethod
def suitable(cls, url):
"""Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected')
- @staticmethod
- def _decrypt_signature(s):
- """Decrypt the key the two subkeys must have a length of 43"""
- (a,b) = s.split('.')
- if len(a) != 43 or len(b) != 43:
- raise ExtractorError(u'Unable to decrypt signature, subkeys lengths not valid')
- b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
- a = a[-40:]
- s_dec = '.'.join((a,b))[::-1]
- return s_dec
+ def _decrypt_signature(self, s):
+ """Turn the encrypted s field into a working signature"""
+
+ if len(s) == 88:
+ return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+ elif len(s) == 87:
+ return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
+ elif len(s) == 86:
+ return s[2:63] + s[82] + s[64:82] + s[63]
+ elif len(s) == 85:
+ return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+ elif len(s) == 84:
+ return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
+ elif len(s) == 83:
+ return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
+ elif len(s) == 82:
+ return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
+
+ else:
+ raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _get_available_subtitles(self, video_id):
self.report_video_subtitles_download(video_id)
return video_id
def _real_extract(self, url):
+ if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
+ self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
+
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
break
if 'token' not in video_info:
if 'reason' in video_info:
- raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0])
+ raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
if video_subtitles:
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
- # We try with the automatic captions
- video_subtitles = self._request_automatic_caption(video_id, video_webpage)
- (sub_error_auto, sub_lang, sub) = video_subtitles[0]
- if sub is not None:
- pass
- else:
- # We report the original error
- self._downloader.report_warning(sub_error)
+ self._downloader.report_warning(sub_error)
+
+ if self._downloader.params.get('writeautomaticsub', False):
+ video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+ (sub_error, sub_lang, sub) = video_subtitles[0]
+ if sub_error:
+ self._downloader.report_warning(sub_error)
if self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_all_subtitles(video_id)
try:
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
+ if not mobj:
+ raise ValueError('Could not find vevo ID')
info = json.loads(mobj.group(1))
args = info['args']
- if args.get('ptk','') == 'vevo' or 'dashmpd':
- # Vevo videos with encrypted signatures
- self.to_screen(u'%s: Vevo video detected.' % video_id)
+ # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
+ # this signatures are encrypted
+ m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
+ if m_s is not None:
+ self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
except ValueError:
pass
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
+ if self._downloader.params.get('verbose'):
+ s = url_data['s'][0]
+ player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+ 'html5 player', fatal=False)
+ self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
+ (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
signature = self._decrypt_signature(url_data['s'][0])
url += '&signature=' + signature
if 'ratebypass' not in url:
if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
- video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
+ video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
return results
class YoutubePlaylistIE(InfoExtractor):
- """Information Extractor for YouTube playlists."""
-
+ IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?:
(?:https?://)?
(?:\w+\.)?
class YoutubeChannelIE(InfoExtractor):
- """Information Extractor for YouTube channels."""
-
+ IE_DESC = u'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
class YoutubeUserIE(InfoExtractor):
- """Information Extractor for YouTube users."""
-
+ IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
return [self.playlist_result(url_results, playlist_title = username)]
class YoutubeSearchIE(SearchInfoExtractor):
- """Information Extractor for YouTube search queries."""
+ IE_DESC = u'YouTube.com searches'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
_MAX_RESULTS = 1000
IE_NAME = u'youtube:search'
video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
return self.playlist_result(videos, query)
+
+
+class YoutubeShowIE(InfoExtractor):
+ IE_DESC = u'YouTube.com (multi-season) shows'
+ _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
+ IE_NAME = u'youtube:show'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ show_name = mobj.group(1)
+ webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
+ # There's one playlist for each season of the show
+ m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
+ self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
+ return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]