]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
d82261e5eec5f3c575bc48f23b23c64aa0355f83
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   7     compat_etree_fromstring
, 
  19 class VevoBaseIE(InfoExtractor
): 
  20     def _extract_json(self
, webpage
, video_id
, item
): 
  21         return self
._parse
_json
( 
  23                 r
'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', 
  24                 webpage
, 'initial store'), 
  25             video_id
)['default'][item
] 
  28 class VevoIE(VevoBaseIE
): 
  30     Accepts urls from vevo.com or in the format 'vevo:{id}' 
  31     (currently used by MTVIE and MySpaceIE) 
  34         (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| 
  35            https?://cache\.vevo\.com/m/html/embed\.html\?video=| 
  36            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| 
  41         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 
  42         'md5': '95ee28ee45e70130e3ab02b0f579ae23', 
  46             'title': 'Hurts - Somebody to Die For', 
  47             'timestamp': 1372057200, 
  48             'upload_date': '20130624', 
  50             'track': 'Somebody to Die For', 
  54         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  56         'note': 'v3 SMIL format', 
  57         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 
  58         'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 
  62             'title': 'Cassadee Pope - I Wish I Could Break Your Heart', 
  63             'timestamp': 1392796919, 
  64             'upload_date': '20140219', 
  65             'uploader': 'Cassadee Pope', 
  66             'track': 'I Wish I Could Break Your Heart', 
  67             'artist': 'Cassadee Pope', 
  70         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  72         'note': 'Age-limited video', 
  73         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 
  77             'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 
  79             'timestamp': 1372888800, 
  80             'upload_date': '20130703', 
  81             'uploader': 'Justin Timberlake', 
  82             'track': 'Tunnel Vision (Explicit)', 
  83             'artist': 'Justin Timberlake', 
  86         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
  88         'note': 'No video_info', 
  89         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', 
  90         'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', 
  94             'title': 'K Camp ft. T.I. - Till I Die', 
  96             'timestamp': 1449468000, 
  97             'upload_date': '20151207', 
  99             'track': 'Till I Die', 
 103         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
 105         'note': 'Featured test', 
 106         'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', 
 107         'md5': 'd28675e5e8805035d949dc5cf161071d', 
 109             'id': 'USUV71402190', 
 111             'title': 'Lemaitre ft. LoLo - Wait', 
 113             'timestamp': 1413432000, 
 114             'upload_date': '20141016', 
 115             'uploader': 'Lemaitre', 
 117             'artist': 'Lemaitre', 
 118             'genre': 'Electronic', 
 120         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], 
 122         'note': 'Only available via webpage', 
 123         'url': 'http://www.vevo.com/watch/GBUV71600656', 
 124         'md5': '67e79210613865b66a47c33baa5e37fe', 
 126             'id': 'GBUV71600656', 
 128             'title': 'ABC - Viva Love', 
 130             'timestamp': 1461830400, 
 131             'upload_date': '20160428', 
 133             'track': 'Viva Love', 
 137         'expected_warnings': ['Failed to download video versions info'], 
 139         # no genres available 
 140         'url': 'http://www.vevo.com/watch/INS171400764', 
 141         'only_matching': True, 
 143     _SMIL_BASE_URL 
= 'http://smil.lvl3.vevo.com' 
 159         0: 'youtube',  # only in AuthenticateVideo videoVersions 
 166     def _parse_smil_formats(self
, smil
, smil_url
, video_id
, namespace
=None, f4m_params
=None, transform_rtmp_url
=None): 
 168         els 
= smil
.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') 
 170             src 
= el
.attrib
['src'] 
 171             m 
= re
.match(r
'''(?xi) 
 174                     [/a-z0-9]+     # The directory and main part of the URL 
 176                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 177                     _(?P<vcodec>[a-z0-9]+) 
 179                     _(?P<acodec>[a-z0-9]+) 
 181                     \.[a-z0-9]+  # File extension 
 186             format_url 
= self
._SMIL
_BASE
_URL 
+ m
.group('path') 
 189                 'format_id': 'smil_' + m
.group('tbr'), 
 190                 'vcodec': m
.group('vcodec'), 
 191                 'acodec': m
.group('acodec'), 
 192                 'tbr': int(m
.group('tbr')), 
 193                 'vbr': int(m
.group('vbr')), 
 194                 'abr': int(m
.group('abr')), 
 195                 'ext': m
.group('ext'), 
 196                 'width': int(m
.group('width')), 
 197                 'height': int(m
.group('height')), 
 201     def _initialize_api(self
, video_id
): 
 202         req 
= sanitized_Request( 
 203             'http://www.vevo.com/auth', data
=b
'') 
 204         webpage 
= self
._download
_webpage
( 
 206             note
='Retrieving oauth token', 
 207             errnote
='Unable to retrieve oauth token') 
 209         if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage
: 
 210             self
.raise_geo_restricted( 
 211                 '%s said: This page is currently unavailable in your region' % self
.IE_NAME
) 
 213         auth_info 
= self
._parse
_json
(webpage
, video_id
) 
 214         self
._api
_url
_template 
= self
.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info
['access_token'] 
 216     def _call_api(self
, path
, *args
, **kwargs
): 
 217         return self
._download
_json
(self
._api
_url
_template 
% path
, *args
, **kwargs
) 
 219     def _real_extract(self
, url
): 
 220         video_id 
= self
._match
_id
(url
) 
 222         json_url 
= 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
 
 223         response 
= self
._download
_json
( 
 224             json_url
, video_id
, 'Downloading video info', 
 225             'Unable to download info', fatal
=False) or {} 
 226         video_info 
= response
.get('video') or {} 
 228         featured_artist 
= None 
 235                 self
._initialize
_api
(video_id
) 
 236             except ExtractorError
: 
 237                 ytid 
= response
.get('errorInfo', {}).get('ytid') 
 240                         'Video is geoblocked, trying with the YouTube video %s' % ytid
) 
 241                     return self
.url_result(ytid
, 'Youtube', ytid
) 
 245             video_info 
= self
._call
_api
( 
 246                 'video/%s' % video_id
, video_id
, 'Downloading api video info', 
 247                 'Failed to download video info') 
 249             video_versions 
= self
._call
_api
( 
 250                 'video/%s/streams' % video_id
, video_id
, 
 251                 'Downloading video versions info', 
 252                 'Failed to download video versions info', 
 255             # Some videos are only available via webpage (e.g. 
 256             # https://github.com/rg3/youtube-dl/issues/9366) 
 257             if not video_versions
: 
 258                 webpage 
= self
._download
_webpage
(url
, video_id
) 
 259                 video_versions 
= self
._extract
_json
(webpage
, video_id
, 'streams')[video_id
][0] 
 261             timestamp 
= parse_iso8601(video_info
.get('releaseDate')) 
 262             artists 
= video_info
.get('artists') 
 263             for curr_artist 
in artists
: 
 264                 if curr_artist
.get('role') == 'Featured': 
 265                     featured_artist 
= curr_artist
['name'] 
 267                     artist 
= uploader 
= curr_artist
['name'] 
 268             view_count 
= int_or_none(video_info
.get('views', {}).get('total')) 
 270             for video_version 
in video_versions
: 
 271                 version 
= self
._VERSIONS
.get(video_version
['version']) 
 272                 version_url 
= video_version
.get('url') 
 276                 if '.ism' in version_url
: 
 278                 elif '.mpd' in version_url
: 
 279                     formats
.extend(self
._extract
_mpd
_formats
( 
 280                         version_url
, video_id
, mpd_id
='dash-%s' % version
, 
 281                         note
='Downloading %s MPD information' % version
, 
 282                         errnote
='Failed to download %s MPD information' % version
, 
 284                 elif '.m3u8' in version_url
: 
 285                     formats
.extend(self
._extract
_m
3u8_formats
( 
 286                         version_url
, video_id
, 'mp4', 'm3u8_native', 
 287                         m3u8_id
='hls-%s' % version
, 
 288                         note
='Downloading %s m3u8 information' % version
, 
 289                         errnote
='Failed to download %s m3u8 information' % version
, 
 292                     m 
= re
.search(r
'''(?xi) 
 293                         _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 294                         _(?P<vcodec>[a-z0-9]+) 
 296                         _(?P<acodec>[a-z0-9]+) 
 298                         \.(?P<ext>[a-z0-9]+)''', version_url
) 
 304                         'format_id': 'http-%s-%s' % (version
, video_version
['quality']), 
 305                         'vcodec': m
.group('vcodec'), 
 306                         'acodec': m
.group('acodec'), 
 307                         'vbr': int(m
.group('vbr')), 
 308                         'abr': int(m
.group('abr')), 
 309                         'ext': m
.group('ext'), 
 310                         'width': int(m
.group('width')), 
 311                         'height': int(m
.group('height')), 
 314             timestamp 
= int_or_none(self
._search
_regex
( 
 316                 video_info
['releaseDate'], 'release date', fatal
=False), 
 318             artists 
= video_info
.get('mainArtists') 
 320                 artist 
= uploader 
= artists
[0]['artistName'] 
 322             featured_artists 
= video_info
.get('featuredArtists') 
 324                 featured_artist 
= featured_artists
[0]['artistName'] 
 327             for video_version 
in video_info
['videoVersions']: 
 328                 version 
= self
._VERSIONS
.get(video_version
['version']) 
 329                 if version 
== 'youtube': 
 332                     source_type 
= self
._SOURCE
_TYPES
.get(video_version
['sourceType']) 
 333                     renditions 
= compat_etree_fromstring(video_version
['data']) 
 334                     if source_type 
== 'http': 
 335                         for rend 
in renditions
.findall('rendition'): 
 339                                 'format_id': 'http-%s-%s' % (version
, attr
['name']), 
 340                                 'height': int_or_none(attr
.get('frameheight')), 
 341                                 'width': int_or_none(attr
.get('frameWidth')), 
 342                                 'tbr': int_or_none(attr
.get('totalBitrate')), 
 343                                 'vbr': int_or_none(attr
.get('videoBitrate')), 
 344                                 'abr': int_or_none(attr
.get('audioBitrate')), 
 345                                 'vcodec': attr
.get('videoCodec'), 
 346                                 'acodec': attr
.get('audioCodec'), 
 348                     elif source_type 
== 'hls': 
 349                         formats
.extend(self
._extract
_m
3u8_formats
( 
 350                             renditions
.find('rendition').attrib
['url'], video_id
, 
 351                             'mp4', 'm3u8_native', m3u8_id
='hls-%s' % version
, 
 352                             note
='Downloading %s m3u8 information' % version
, 
 353                             errnote
='Failed to download %s m3u8 information' % version
, 
 355                     elif source_type 
== 'smil' and version 
== 'level3' and not smil_parsed
: 
 356                         formats
.extend(self
._extract
_smil
_formats
( 
 357                             renditions
.find('rendition').attrib
['url'], video_id
, False)) 
 359         self
._sort
_formats
(formats
) 
 361         track 
= video_info
['title'] 
 363             artist 
= '%s ft. %s' % (artist
, featured_artist
) 
 364         title 
= '%s - %s' % (artist
, track
) if artist 
else track
 
 366         genres 
= video_info
.get('genres') 
 368             genres
[0] if genres 
and isinstance(genres
, list) and 
 369             isinstance(genres
[0], compat_str
) else None) 
 371         is_explicit 
= video_info
.get('isExplicit') 
 372         if is_explicit 
is True: 
 374         elif is_explicit 
is False: 
 379         duration 
= video_info
.get('duration') 
 385             'thumbnail': video_info
.get('imageUrl') or video_info
.get('thumbnailUrl'), 
 386             'timestamp': timestamp
, 
 387             'uploader': uploader
, 
 388             'duration': duration
, 
 389             'view_count': view_count
, 
 390             'age_limit': age_limit
, 
 397 class VevoPlaylistIE(VevoBaseIE
): 
 398     _VALID_URL 
= r
'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' 
 401         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 403             'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 404             'title': 'Best-Of: Birdman', 
 406         'playlist_count': 10, 
 408         'url': 'http://www.vevo.com/watch/genre/rock', 
 413         'playlist_count': 20, 
 415         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', 
 416         'md5': '32dcdfddddf9ec6917fc88ca26d36282', 
 418             'id': 'USCMV1100073', 
 420             'title': 'Birdman - Y.U. MAD', 
 421             'timestamp': 1323417600, 
 422             'upload_date': '20111209', 
 423             'uploader': 'Birdman', 
 426             'genre': 'Rap/Hip-Hop', 
 428         'expected_warnings': ['Unable to download SMIL file'], 
 430         'url': 'http://www.vevo.com/watch/genre/rock?index=0', 
 431         'only_matching': True, 
 434     def _real_extract(self
, url
): 
 435         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 436         playlist_id 
= mobj
.group('id') 
 437         playlist_kind 
= mobj
.group('kind') 
 439         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 441         qs 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
 442         index 
= qs
.get('index', [None])[0] 
 445             video_id 
= self
._search
_regex
( 
 446                 r
'<meta[^>]+content=(["\'])vevo
://video
/(?P
<id>.+?
)\
1[^
>]*>', 
 447                 webpage, 'video 
id', default=None, group='id') 
 449                 return self.url_result('vevo
:%s' % video_id, VevoIE.ie_key()) 
 451         playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind) 
 453         playlist = (list(playlists.values())[0] 
 454                     if playlist_kind == 'playlist
' else playlists[playlist_id]) 
 457             self.url_result('vevo
:%s' % src, VevoIE.ie_key()) 
 458             for src in playlist['isrcs
']] 
 460         return self.playlist_result( 
 461             entries, playlist.get('playlistId
') or playlist_id, 
 462             playlist.get('name
'), playlist.get('description
'))