]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  19 class VevoBaseIE(InfoExtractor
): 
  20     def _extract_json(self
, webpage
, video_id
): 
  21         return self
._parse
_json
( 
  23                 r
'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', 
  24                 webpage
, 'initial store'), 
  28 class VevoIE(VevoBaseIE
): 
  30     Accepts urls from vevo.com or in the format 'vevo:{id}' 
  31     (currently used by MTVIE and MySpaceIE) 
  34         (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| 
  35            https?://cache\.vevo\.com/m/html/embed\.html\?video=| 
  36            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| 
  37            https?://embed\.vevo\.com/.*?[?&]isrc=| 
  42         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 
  43         'md5': '95ee28ee45e70130e3ab02b0f579ae23', 
  47             'title': 'Hurts - Somebody to Die For', 
  48             'timestamp': 1372057200, 
  49             'upload_date': '20130624', 
  51             'track': 'Somebody to Die For', 
  55         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  57         'note': 'v3 SMIL format', 
  58         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 
  59         'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 
  63             'title': 'Cassadee Pope - I Wish I Could Break Your Heart', 
  64             'timestamp': 1392796919, 
  65             'upload_date': '20140219', 
  66             'uploader': 'Cassadee Pope', 
  67             'track': 'I Wish I Could Break Your Heart', 
  68             'artist': 'Cassadee Pope', 
  71         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  73         'note': 'Age-limited video', 
  74         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 
  78             'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 
  80             'timestamp': 1372888800, 
  81             'upload_date': '20130703', 
  82             'uploader': 'Justin Timberlake', 
  83             'track': 'Tunnel Vision (Explicit)', 
  84             'artist': 'Justin Timberlake', 
  87         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  89         'note': 'No video_info', 
  90         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', 
  91         'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', 
  95             'title': 'K Camp ft. T.I. - Till I Die', 
  97             'timestamp': 1449468000, 
  98             'upload_date': '20151207', 
 100             'track': 'Till I Die', 
 104         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
 106         'note': 'Featured test', 
 107         'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', 
 108         'md5': 'd28675e5e8805035d949dc5cf161071d', 
 110             'id': 'USUV71402190', 
 112             'title': 'Lemaitre ft. LoLo - Wait', 
 114             'timestamp': 1413432000, 
 115             'upload_date': '20141016', 
 116             'uploader': 'Lemaitre', 
 118             'artist': 'Lemaitre', 
 119             'genre': 'Electronic', 
 121         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
 123         'note': 'Only available via webpage', 
 124         'url': 'http://www.vevo.com/watch/GBUV71600656', 
 125         'md5': '67e79210613865b66a47c33baa5e37fe', 
 127             'id': 'GBUV71600656', 
 129             'title': 'ABC - Viva Love', 
 131             'timestamp': 1461830400, 
 132             'upload_date': '20160428', 
 134             'track': 'Viva Love', 
 138         'expected_warnings': ['Failed to download video versions info'], 
 140         # no genres available 
 141         'url': 'http://www.vevo.com/watch/INS171400764', 
 142         'only_matching': True, 
 144         # Another case available only via the webpage; using streams/streamsV3 formats 
 145         # Geo-restricted to Netherlands/Germany 
 146         'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', 
 147         'only_matching': True, 
 149         'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=', 
 150         'only_matching': True, 
 153         0: 'youtube',  # only in AuthenticateVideo videoVersions 
 160     def _initialize_api(self
, video_id
): 
 161         webpage 
= self
._download
_webpage
( 
 162             'https://accounts.vevo.com/token', None, 
 163             note
='Retrieving oauth token', 
 164             errnote
='Unable to retrieve oauth token', 
 166                 'client_id': 'SPupX1tvqFEopQ1YS6SS', 
 167                 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous', 
 170                 'Content-Type': 'application/json', 
 173         if re
.search(r
'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage
): 
 174             self
.raise_geo_restricted( 
 175                 '%s said: This page is currently unavailable in your region' % self
.IE_NAME
) 
 177         auth_info 
= self
._parse
_json
(webpage
, video_id
) 
 178         self
._api
_url
_template 
= self
.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info
['legacy_token'] 
 180     def _call_api(self
, path
, *args
, **kwargs
): 
 182             data 
= self
._download
_json
(self
._api
_url
_template 
% path
, *args
, **kwargs
) 
 183         except ExtractorError 
as e
: 
 184             if isinstance(e
.cause
, compat_HTTPError
): 
 185                 errors 
= self
._parse
_json
(e
.cause
.read().decode(), None)['errors'] 
 186                 error_message 
= ', '.join([error
['message'] for error 
in errors
]) 
 187                 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, error_message
), expected
=True) 
 191     def _real_extract(self
, url
): 
 192         video_id 
= self
._match
_id
(url
) 
 194         self
._initialize
_api
(video_id
) 
 196         video_info 
= self
._call
_api
( 
 197             'video/%s' % video_id
, video_id
, 'Downloading api video info', 
 198             'Failed to download video info') 
 200         video_versions 
= self
._call
_api
( 
 201             'video/%s/streams' % video_id
, video_id
, 
 202             'Downloading video versions info', 
 203             'Failed to download video versions info', 
 206         # Some videos are only available via webpage (e.g. 
 207         # https://github.com/ytdl-org/youtube-dl/issues/9366) 
 208         if not video_versions
: 
 209             webpage 
= self
._download
_webpage
(url
, video_id
) 
 210             json_data 
= self
._extract
_json
(webpage
, video_id
) 
 211             if 'streams' in json_data
.get('default', {}): 
 212                 video_versions 
= json_data
['default']['streams'][video_id
][0] 
 216                     for key
, value 
in json_data
['apollo']['data'].items() 
 217                     if key
.startswith('%s.streams' % video_id
)] 
 221         featured_artist 
= None 
 222         artists 
= video_info
.get('artists') 
 223         for curr_artist 
in artists
: 
 224             if curr_artist
.get('role') == 'Featured': 
 225                 featured_artist 
= curr_artist
['name'] 
 227                 artist 
= uploader 
= curr_artist
['name'] 
 230         for video_version 
in video_versions
: 
 231             version 
= self
._VERSIONS
.get(video_version
.get('version'), 'generic') 
 232             version_url 
= video_version
.get('url') 
 236             if '.ism' in version_url
: 
 238             elif '.mpd' in version_url
: 
 239                 formats
.extend(self
._extract
_mpd
_formats
( 
 240                     version_url
, video_id
, mpd_id
='dash-%s' % version
, 
 241                     note
='Downloading %s MPD information' % version
, 
 242                     errnote
='Failed to download %s MPD information' % version
, 
 244             elif '.m3u8' in version_url
: 
 245                 formats
.extend(self
._extract
_m
3u8_formats
( 
 246                     version_url
, video_id
, 'mp4', 'm3u8_native', 
 247                     m3u8_id
='hls-%s' % version
, 
 248                     note
='Downloading %s m3u8 information' % version
, 
 249                     errnote
='Failed to download %s m3u8 information' % version
, 
 252                 m 
= re
.search(r
'''(?xi) 
 253                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 254                     _(?P<vcodec>[a-z0-9]+) 
 256                     _(?P<acodec>[a-z0-9]+) 
 258                     \.(?P<ext>[a-z0-9]+)''', version_url
) 
 264                     'format_id': 'http-%s-%s' % (version
, video_version
['quality']), 
 265                     'vcodec': m
.group('vcodec'), 
 266                     'acodec': m
.group('acodec'), 
 267                     'vbr': int(m
.group('vbr')), 
 268                     'abr': int(m
.group('abr')), 
 269                     'ext': m
.group('ext'), 
 270                     'width': int(m
.group('width')), 
 271                     'height': int(m
.group('height')), 
 273         self
._sort
_formats
(formats
) 
 275         track 
= video_info
['title'] 
 277             artist 
= '%s ft. %s' % (artist
, featured_artist
) 
 278         title 
= '%s - %s' % (artist
, track
) if artist 
else track
 
 280         genres 
= video_info
.get('genres') 
 282             genres
[0] if genres 
and isinstance(genres
, list) 
 283             and isinstance(genres
[0], compat_str
) else None) 
 285         is_explicit 
= video_info
.get('isExplicit') 
 286         if is_explicit 
is True: 
 288         elif is_explicit 
is False: 
 297             'thumbnail': video_info
.get('imageUrl') or video_info
.get('thumbnailUrl'), 
 298             'timestamp': parse_iso8601(video_info
.get('releaseDate')), 
 299             'uploader': uploader
, 
 300             'duration': int_or_none(video_info
.get('duration')), 
 301             'view_count': int_or_none(video_info
.get('views', {}).get('total')), 
 302             'age_limit': age_limit
, 
 309 class VevoPlaylistIE(VevoBaseIE
): 
 310     _VALID_URL 
= r
'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' 
 313         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 315             'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 316             'title': 'Best-Of: Birdman', 
 318         'playlist_count': 10, 
 320         'url': 'http://www.vevo.com/watch/genre/rock', 
 325         'playlist_count': 20, 
 327         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', 
 328         'md5': '32dcdfddddf9ec6917fc88ca26d36282', 
 330             'id': 'USCMV1100073', 
 332             'title': 'Birdman - Y.U. MAD', 
 333             'timestamp': 1323417600, 
 334             'upload_date': '20111209', 
 335             'uploader': 'Birdman', 
 338             'genre': 'Rap/Hip-Hop', 
 340         'expected_warnings': ['Unable to download SMIL file'], 
 342         'url': 'http://www.vevo.com/watch/genre/rock?index=0', 
 343         'only_matching': True, 
 346     def _real_extract(self
, url
): 
 347         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 348         playlist_id 
= mobj
.group('id') 
 349         playlist_kind 
= mobj
.group('kind') 
 351         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 353         qs 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
 354         index 
= qs
.get('index', [None])[0] 
 357             video_id 
= self
._search
_regex
( 
 358                 r
'<meta[^>]+content=(["\'])vevo
://video
/(?P
<id>.+?
)\
1[^
>]*>', 
 359                 webpage, 'video 
id', default=None, group='id') 
 361                 return self.url_result('vevo
:%s' % video_id, VevoIE.ie_key()) 
 363         playlists = self._extract_json(webpage, playlist_id)['default
']['%ss' % playlist_kind] 
 365         playlist = (list(playlists.values())[0] 
 366                     if playlist_kind == 'playlist
' else playlists[playlist_id]) 
 369             self.url_result('vevo
:%s' % src, VevoIE.ie_key()) 
 370             for src in playlist['isrcs
']] 
 372         return self.playlist_result( 
 373             entries, playlist.get('playlistId
') or playlist_id, 
 374             playlist.get('name
'), playlist.get('description
'))