X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/421b7b220ea958384617bd7d339f188bf601280e..725f8b2d4cc43b8401946dfed69d22cfe40810fc:/youtube_dl/extractor/youtube.py?ds=sidebyside
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9943ddd..e7bd1f1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -87,7 +87,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
- if self._LOGIN_REQUIRED:
+ if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return True
@@ -1596,6 +1596,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'token' not in video_info:
video_info = get_video_info
break
+
+ def extract_unavailable_message():
+ return self._html_search_regex(
+ r'(?s)
]+id="unavailable-message"[^>]*>(.+?)
',
+ video_webpage, 'unavailable message', default=None)
+
if 'token' not in video_info:
if 'reason' in video_info:
if 'The uploader has not made this video available in your country.' in video_info['reason']:
@@ -1604,8 +1610,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
countries = regions_allowed.split(',') if regions_allowed else None
self.raise_geo_restricted(
msg=video_info['reason'][0], countries=countries)
+ reason = video_info['reason'][0]
+ if 'Invalid parameters' in reason:
+ unavailable_message = extract_unavailable_message()
+ if unavailable_message:
+ reason = unavailable_message
raise ExtractorError(
- 'YouTube said: %s' % video_info['reason'][0],
+ 'YouTube said: %s' % reason,
expected=True, video_id=video_id)
else:
raise ExtractorError(
@@ -1810,7 +1821,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': video_info['conn'][0],
'player_url': player_url,
}]
- elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
+ elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
@@ -1933,6 +1944,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
if codecs:
dct.update(parse_codecs(codecs))
+ if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
+ dct['downloader_options'] = {
+ # Youtube throttles chunks >~10M
+ 'http_chunk_size': 10485760,
+ }
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
@@ -1953,9 +1969,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format)
else:
- unavailable_message = self._html_search_regex(
- r'(?s)]+id="unavailable-message"[^>]*>(.+?)
',
- video_webpage, 'unavailable message', default=None)
+ unavailable_message = extract_unavailable_message()
if unavailable_message:
raise ExtractorError(unavailable_message, expected=True)
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
@@ -2270,6 +2284,19 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
r'(?s)',
page, 'title', default=None)
+ _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*\s*]+\bhref='
+ uploader = self._search_regex(
+ r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
+ page, 'uploader', default=None)
+ mobj = re.search(
+ r'%s(["\'])(?P/(?:user|channel)/(?P.+?))\1' % _UPLOADER_BASE,
+ page)
+ if mobj:
+ uploader_id = mobj.group('uploader_id')
+ uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
+ else:
+ uploader_id = uploader_url = None
+
has_videos = True
if not playlist_title:
@@ -2280,8 +2307,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
except StopIteration:
has_videos = False
- return has_videos, self.playlist_result(
+ playlist = self.playlist_result(
self._entries(page, playlist_id), playlist_id, playlist_title)
+ playlist.update({
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ })
+
+ return has_videos, playlist
def _check_download_just_video(self, url, playlist_id):
# Check if it's a video-specific URL
@@ -2417,7 +2451,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
class YoutubeUserIE(YoutubeChannelIE):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?Puser|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)'
+ _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?Puser|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
IE_NAME = 'youtube:user'
@@ -2510,10 +2544,11 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
webpage = self._download_webpage(url, channel_id, fatal=False)
if webpage:
page_type = self._og_search_property(
- 'type', webpage, 'page type', default=None)
+ 'type', webpage, 'page type', default='')
video_id = self._html_search_meta(
'videoId', webpage, 'video id', default=None)
- if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):
+ if page_type.startswith('video') and video_id and re.match(
+ r'^[0-9A-Za-z_-]{11}$', video_id):
return self.url_result(video_id, YoutubeIE.ie_key())
return self.url_result(base_url)
@@ -2548,7 +2583,11 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
}]
-class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
+class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P[^"]+))?'
+
+
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
@@ -2582,8 +2621,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
raise ExtractorError(
'[youtube] No video results', expected=True)
- new_videos = self._ids_to_results(orderedSet(re.findall(
- r'href="/watch\?v=(.{11})', html_content)))
+ new_videos = list(self._process_page(html_content))
videos += new_videos
if not new_videos or len(videos) > limit:
break
@@ -2606,11 +2644,10 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
+class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P[^&]+)(?:[&]|$)'
- _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P[^"]+))?'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
@@ -2662,10 +2699,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
- def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
-
+ def _entries(self, page):
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
@@ -2676,12 +2710,15 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
# 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos
- new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
+ new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
if not new_ids:
break
ids.extend(new_ids)
+ for entry in self._ids_to_results(new_ids):
+ yield entry
+
mobj = re.search(r'data-uix-load-more-href="/?(?P[^"]+)"', more_widget_html)
if not mobj:
break
@@ -2693,8 +2730,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
+ def _real_extract(self, url):
+ page = self._download_webpage(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+ self._PLAYLIST_TITLE)
return self.playlist_result(
- self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
+ self._entries(page), playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE):