]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  19 class VevoBaseIE(InfoExtractor
): 
  20     def _extract_json(self
, webpage
, video_id
): 
  21         return self
._parse
_json
( 
  23                 r
'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', 
  24                 webpage
, 'initial store'), 
  28 class VevoIE(VevoBaseIE
): 
  30     Accepts urls from vevo.com or in the format 'vevo:{id}' 
  31     (currently used by MTVIE and MySpaceIE) 
  34         (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| 
  35            https?://cache\.vevo\.com/m/html/embed\.html\?video=| 
  36            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| 
  41         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 
  42         'md5': '95ee28ee45e70130e3ab02b0f579ae23', 
  46             'title': 'Hurts - Somebody to Die For', 
  47             'timestamp': 1372057200, 
  48             'upload_date': '20130624', 
  50             'track': 'Somebody to Die For', 
  54         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  56         'note': 'v3 SMIL format', 
  57         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 
  58         'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 
  62             'title': 'Cassadee Pope - I Wish I Could Break Your Heart', 
  63             'timestamp': 1392796919, 
  64             'upload_date': '20140219', 
  65             'uploader': 'Cassadee Pope', 
  66             'track': 'I Wish I Could Break Your Heart', 
  67             'artist': 'Cassadee Pope', 
  70         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  72         'note': 'Age-limited video', 
  73         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 
  77             'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 
  79             'timestamp': 1372888800, 
  80             'upload_date': '20130703', 
  81             'uploader': 'Justin Timberlake', 
  82             'track': 'Tunnel Vision (Explicit)', 
  83             'artist': 'Justin Timberlake', 
  86         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  88         'note': 'No video_info', 
  89         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', 
  90         'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', 
  94             'title': 'K Camp ft. T.I. - Till I Die', 
  96             'timestamp': 1449468000, 
  97             'upload_date': '20151207', 
  99             'track': 'Till I Die', 
 103         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
 105         'note': 'Featured test', 
 106         'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', 
 107         'md5': 'd28675e5e8805035d949dc5cf161071d', 
 109             'id': 'USUV71402190', 
 111             'title': 'Lemaitre ft. LoLo - Wait', 
 113             'timestamp': 1413432000, 
 114             'upload_date': '20141016', 
 115             'uploader': 'Lemaitre', 
 117             'artist': 'Lemaitre', 
 118             'genre': 'Electronic', 
 120         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
 122         'note': 'Only available via webpage', 
 123         'url': 'http://www.vevo.com/watch/GBUV71600656', 
 124         'md5': '67e79210613865b66a47c33baa5e37fe', 
 126             'id': 'GBUV71600656', 
 128             'title': 'ABC - Viva Love', 
 130             'timestamp': 1461830400, 
 131             'upload_date': '20160428', 
 133             'track': 'Viva Love', 
 137         'expected_warnings': ['Failed to download video versions info'], 
 139         # no genres available 
 140         'url': 'http://www.vevo.com/watch/INS171400764', 
 141         'only_matching': True, 
 143         # Another case available only via the webpage; using streams/streamsV3 formats 
 144         # Geo-restricted to Netherlands/Germany 
 145         'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', 
 146         'only_matching': True, 
 149         0: 'youtube',  # only in AuthenticateVideo videoVersions 
 156     def _initialize_api(self
, video_id
): 
 157         webpage 
= self
._download
_webpage
( 
 158             'https://accounts.vevo.com/token', None, 
 159             note
='Retrieving oauth token', 
 160             errnote
='Unable to retrieve oauth token', 
 162                 'client_id': 'SPupX1tvqFEopQ1YS6SS', 
 163                 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous', 
 166                 'Content-Type': 'application/json', 
 169         if re
.search(r
'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage
): 
 170             self
.raise_geo_restricted( 
 171                 '%s said: This page is currently unavailable in your region' % self
.IE_NAME
) 
 173         auth_info 
= self
._parse
_json
(webpage
, video_id
) 
 174         self
._api
_url
_template 
= self
.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info
['legacy_token'] 
 176     def _call_api(self
, path
, *args
, **kwargs
): 
 178             data 
= self
._download
_json
(self
._api
_url
_template 
% path
, *args
, **kwargs
) 
 179         except ExtractorError 
as e
: 
 180             if isinstance(e
.cause
, compat_HTTPError
): 
 181                 errors 
= self
._parse
_json
(e
.cause
.read().decode(), None)['errors'] 
 182                 error_message 
= ', '.join([error
['message'] for error 
in errors
]) 
 183                 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, error_message
), expected
=True) 
 187     def _real_extract(self
, url
): 
 188         video_id 
= self
._match
_id
(url
) 
 190         self
._initialize
_api
(video_id
) 
 192         video_info 
= self
._call
_api
( 
 193             'video/%s' % video_id
, video_id
, 'Downloading api video info', 
 194             'Failed to download video info') 
 196         video_versions 
= self
._call
_api
( 
 197             'video/%s/streams' % video_id
, video_id
, 
 198             'Downloading video versions info', 
 199             'Failed to download video versions info', 
 202         # Some videos are only available via webpage (e.g. 
 203         # https://github.com/rg3/youtube-dl/issues/9366) 
 204         if not video_versions
: 
 205             webpage 
= self
._download
_webpage
(url
, video_id
) 
 206             json_data 
= self
._extract
_json
(webpage
, video_id
) 
 207             if 'streams' in json_data
.get('default', {}): 
 208                 video_versions 
= json_data
['default']['streams'][video_id
][0] 
 212                     for key
, value 
in json_data
['apollo']['data'].items() 
 213                     if key
.startswith('%s.streams' % video_id
)] 
 217         featured_artist 
= None 
 218         artists 
= video_info
.get('artists') 
 219         for curr_artist 
in artists
: 
 220             if curr_artist
.get('role') == 'Featured': 
 221                 featured_artist 
= curr_artist
['name'] 
 223                 artist 
= uploader 
= curr_artist
['name'] 
 226         for video_version 
in video_versions
: 
 227             version 
= self
._VERSIONS
.get(video_version
.get('version'), 'generic') 
 228             version_url 
= video_version
.get('url') 
 232             if '.ism' in version_url
: 
 234             elif '.mpd' in version_url
: 
 235                 formats
.extend(self
._extract
_mpd
_formats
( 
 236                     version_url
, video_id
, mpd_id
='dash-%s' % version
, 
 237                     note
='Downloading %s MPD information' % version
, 
 238                     errnote
='Failed to download %s MPD information' % version
, 
 240             elif '.m3u8' in version_url
: 
 241                 formats
.extend(self
._extract
_m
3u8_formats
( 
 242                     version_url
, video_id
, 'mp4', 'm3u8_native', 
 243                     m3u8_id
='hls-%s' % version
, 
 244                     note
='Downloading %s m3u8 information' % version
, 
 245                     errnote
='Failed to download %s m3u8 information' % version
, 
 248                 m 
= re
.search(r
'''(?xi) 
 249                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 250                     _(?P<vcodec>[a-z0-9]+) 
 252                     _(?P<acodec>[a-z0-9]+) 
 254                     \.(?P<ext>[a-z0-9]+)''', version_url
) 
 260                     'format_id': 'http-%s-%s' % (version
, video_version
['quality']), 
 261                     'vcodec': m
.group('vcodec'), 
 262                     'acodec': m
.group('acodec'), 
 263                     'vbr': int(m
.group('vbr')), 
 264                     'abr': int(m
.group('abr')), 
 265                     'ext': m
.group('ext'), 
 266                     'width': int(m
.group('width')), 
 267                     'height': int(m
.group('height')), 
 269         self
._sort
_formats
(formats
) 
 271         track 
= video_info
['title'] 
 273             artist 
= '%s ft. %s' % (artist
, featured_artist
) 
 274         title 
= '%s - %s' % (artist
, track
) if artist 
else track
 
 276         genres 
= video_info
.get('genres') 
 278             genres
[0] if genres 
and isinstance(genres
, list) and 
 279             isinstance(genres
[0], compat_str
) else None) 
 281         is_explicit 
= video_info
.get('isExplicit') 
 282         if is_explicit 
is True: 
 284         elif is_explicit 
is False: 
 293             'thumbnail': video_info
.get('imageUrl') or video_info
.get('thumbnailUrl'), 
 294             'timestamp': parse_iso8601(video_info
.get('releaseDate')), 
 295             'uploader': uploader
, 
 296             'duration': int_or_none(video_info
.get('duration')), 
 297             'view_count': int_or_none(video_info
.get('views', {}).get('total')), 
 298             'age_limit': age_limit
, 
 305 class VevoPlaylistIE(VevoBaseIE
): 
 306     _VALID_URL 
= r
'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' 
 309         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 311             'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 312             'title': 'Best-Of: Birdman', 
 314         'playlist_count': 10, 
 316         'url': 'http://www.vevo.com/watch/genre/rock', 
 321         'playlist_count': 20, 
 323         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', 
 324         'md5': '32dcdfddddf9ec6917fc88ca26d36282', 
 326             'id': 'USCMV1100073', 
 328             'title': 'Birdman - Y.U. MAD', 
 329             'timestamp': 1323417600, 
 330             'upload_date': '20111209', 
 331             'uploader': 'Birdman', 
 334             'genre': 'Rap/Hip-Hop', 
 336         'expected_warnings': ['Unable to download SMIL file'], 
 338         'url': 'http://www.vevo.com/watch/genre/rock?index=0', 
 339         'only_matching': True, 
 342     def _real_extract(self
, url
): 
 343         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 344         playlist_id 
= mobj
.group('id') 
 345         playlist_kind 
= mobj
.group('kind') 
 347         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 349         qs 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
 350         index 
= qs
.get('index', [None])[0] 
 353             video_id 
= self
._search
_regex
( 
 354                 r
'<meta[^>]+content=(["\'])vevo
://video
/(?P
<id>.+?
)\
1[^
>]*>', 
 355                 webpage, 'video 
id', default=None, group='id') 
 357                 return self.url_result('vevo
:%s' % video_id, VevoIE.ie_key()) 
 359         playlists = self._extract_json(webpage, playlist_id)['default
']['%ss' % playlist_kind] 
 361         playlist = (list(playlists.values())[0] 
 362                     if playlist_kind == 'playlist
' else playlists[playlist_id]) 
 365             self.url_result('vevo
:%s' % src, VevoIE.ie_key()) 
 366             for src in playlist['isrcs
']] 
 368         return self.playlist_result( 
 369             entries, playlist.get('playlistId
') or playlist_id, 
 370             playlist.get('name
'), playlist.get('description
'))