from ..utils import (
bool_or_none,
clean_html,
- dict_get,
error_to_compat_str,
extract_attributes,
ExtractorError,
'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)',
- 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
+ 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'],
'id': 'nfWlot6h_JM',
'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off',
- 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
+ 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
'duration': 242,
'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818',
- 'creator': 'Taylor Swift',
},
'params': {
'youtube_include_dash_manifest': True,
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
- 'creator': 'deadmau5',
+ 'creator': 'Dada Life, deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)',
- 'alt_title': 'Some Chords',
+ 'alt_title': 'This Machine Kills Some Chords',
},
'expected_warnings': [
'DASH manifest missing',
'skip_download': True,
'youtube_include_dash_manifest': False,
},
+ 'skip': 'not actual anymore',
},
{
# Youtube Music Auto-generated description
'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
+ 'uploader': 'Stephen - Topic',
+ 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'artist': 'Stephen',
'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear',
'id': '-hcAI0g-f5M',
'ext': 'mp4',
'title': 'Put It On Me',
- 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
+ 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
'upload_date': '20180426',
'uploader': 'Matt Maeson - Topic',
'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
+ r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
player_url)
if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url)
def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
- def extract_token(v_info):
- return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
-
def extract_player_response(player_response, video_id):
pl_response = str_or_none(player_response)
if not pl_response:
player_response = {}
# Get video info
+ video_info = {}
embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
- video_info_webpage = self._download_webpage(
- video_info_url, video_id,
- note='Refetching age-gated info webpage',
- errnote='unable to download video info webpage')
- video_info = compat_parse_qs(video_info_webpage)
- pl_response = video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(video_info)
- view_count = extract_view_count(video_info)
+ try:
+ video_info_webpage = self._download_webpage(
+ video_info_url, video_id,
+ note='Refetching age-gated info webpage',
+ errnote='unable to download video info webpage')
+ except ExtractorError:
+ video_info_webpage = None
+ if video_info_webpage:
+ video_info = compat_parse_qs(video_info_webpage)
+ pl_response = video_info.get('player_response', [None])[0]
+ player_response = extract_player_response(pl_response, video_id)
+ add_dash_mpd(video_info)
+ view_count = extract_view_count(video_info)
else:
age_gate = False
- video_info = None
- sts = None
# Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
- sts = ytplayer_config.get('sts')
if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id)
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response)
- # We also try looking in get_video_info since it may contain different dashmpd
- # URL that points to a DASH manifest with possibly different itag set (some itags
- # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
- # manifest pointed by get_video_info's dashmpd).
- # The general idea is to take a union of itags of both DASH manifests (for example
- # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
- self.report_video_info_webpage_download(video_id)
- for el in ('embedded', 'detailpage', 'vevo', ''):
- query = {
- 'video_id': video_id,
- 'ps': 'default',
- 'eurl': '',
- 'gl': 'US',
- 'hl': 'en',
- }
- if el:
- query['el'] = el
- if sts:
- query['sts'] = sts
- video_info_webpage = self._download_webpage(
- '%s://www.youtube.com/get_video_info' % proto,
- video_id, note=False,
- errnote='unable to download video info webpage',
- fatal=False, query=query)
- if not video_info_webpage:
- continue
- get_video_info = compat_parse_qs(video_info_webpage)
- if not player_response:
- pl_response = get_video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(get_video_info)
- if view_count is None:
- view_count = extract_view_count(get_video_info)
- if not video_info:
- video_info = get_video_info
- get_token = extract_token(get_video_info)
- if get_token:
- # Different get_video_info requests may report different results, e.g.
- # some may report video unavailability, but some may serve it without
- # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
- # the original webpage as well as el=info and el=embedded get_video_info
- # requests report video unavailability due to geo restriction while
- # el=detailpage succeeds and returns valid data). This is probably
- # due to YouTube measures against IP ranges of hosting providers.
- # Working around by preferring the first succeeded video_info containing
- # the token if no such video_info yet was found.
- token = extract_token(video_info)
- if not token:
- video_info = get_video_info
- break
def extract_unavailable_message():
messages = []
if messages:
return '\n'.join(messages)
- if not video_info:
+ if not video_info and not player_response:
unavailable_message = extract_unavailable_message()
if not unavailable_message:
unavailable_message = 'Unable to extract video data'
raise ExtractorError(
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
+ if not isinstance(video_info, dict):
+ video_info = {}
+
video_details = try_get(
player_response, lambda x: x['videoDetails'], dict) or {}
else:
player_version = self._search_regex(
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
+ r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
f['stretched_ratio'] = ratio
if not formats:
- token = extract_token(video_info)
- if not token:
- if 'reason' in video_info:
- if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta(
- 'regionsAllowed', video_webpage, default=None)
- countries = regions_allowed.split(',') if regions_allowed else None
- self.raise_geo_restricted(
- msg=video_info['reason'][0], countries=countries)
- reason = video_info['reason'][0]
- if 'Invalid parameters' in reason:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- reason = unavailable_message
- raise ExtractorError(
- 'YouTube said: %s' % reason,
- expected=True, video_id=video_id)
- else:
- raise ExtractorError(
- '"token" parameter not in video info for unknown reason',
- video_id=video_id)
-
- if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if 'reason' in video_info:
+ if 'The uploader has not made this video available in your country.' in video_info['reason']:
+ regions_allowed = self._html_search_meta(
+ 'regionsAllowed', video_webpage, default=None)
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ msg=video_info['reason'][0], countries=countries)
+ reason = video_info['reason'][0]
+ if 'Invalid parameters' in reason:
+ unavailable_message = extract_unavailable_message()
+ if unavailable_message:
+ reason = unavailable_message
+ raise ExtractorError(
+ 'YouTube said: %s' % reason,
+ expected=True, video_id=video_id)
+ if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
+ raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats)
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist'
_TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': {
- 'title': 'ytdl test PL',
- 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'title': 'youtube-dl public playlist',
},
- 'playlist_count': 3,
+ 'playlist_count': 1,
}, {
- 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'info_dict': {
- 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'title': 'YDL_Empty_List',
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'title': 'youtube-dl empty playlist',
},
'playlist_count': 0,
- 'skip': 'This playlist is private',
}, {
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'uploader': 'Christiaan008',
'uploader_id': 'ChRiStIaAn008',
},
- 'playlist_count': 95,
+ 'playlist_count': 96,
}, {
'note': 'issue #673',
'url': 'PLBB231211A4F62143',