]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   7     compat_etree_fromstring
, 
  19 class VevoBaseIE(InfoExtractor
): 
  20     def _extract_json(self
, webpage
, video_id
, item
): 
  21         return self
._parse
_json
( 
  23                 r
'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', 
  24                 webpage
, 'initial store'), 
  25             video_id
)['default'][item
] 
  28 class VevoIE(VevoBaseIE
): 
  30     Accepts urls from vevo.com or in the format 'vevo:{id}' 
  31     (currently used by MTVIE and MySpaceIE) 
  34         (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| 
  35            https?://cache\.vevo\.com/m/html/embed\.html\?video=| 
  36            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| 
  41         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 
  42         'md5': '95ee28ee45e70130e3ab02b0f579ae23', 
  46             'title': 'Hurts - Somebody to Die For', 
  47             'timestamp': 1372057200, 
  48             'upload_date': '20130624', 
  50             'track': 'Somebody to Die For', 
  54         'expected_warnings': ['Unable to download SMIL file'], 
  56         'note': 'v3 SMIL format', 
  57         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 
  58         'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 
  62             'title': 'Cassadee Pope - I Wish I Could Break Your Heart', 
  63             'timestamp': 1392796919, 
  64             'upload_date': '20140219', 
  65             'uploader': 'Cassadee Pope', 
  66             'track': 'I Wish I Could Break Your Heart', 
  67             'artist': 'Cassadee Pope', 
  70         'expected_warnings': ['Unable to download SMIL file'], 
  72         'note': 'Age-limited video', 
  73         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 
  77             'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 
  79             'timestamp': 1372888800, 
  80             'upload_date': '20130703', 
  81             'uploader': 'Justin Timberlake', 
  82             'track': 'Tunnel Vision (Explicit)', 
  83             'artist': 'Justin Timberlake', 
  86         'expected_warnings': ['Unable to download SMIL file'], 
  88         'note': 'No video_info', 
  89         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', 
  90         'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', 
  94             'title': 'K Camp - Till I Die', 
  96             'timestamp': 1449468000, 
  97             'upload_date': '20151207', 
  99             'track': 'Till I Die', 
 101             'genre': 'Rap/Hip-Hop', 
 104         'note': 'Only available via webpage', 
 105         'url': 'http://www.vevo.com/watch/GBUV71600656', 
 106         'md5': '67e79210613865b66a47c33baa5e37fe', 
 108             'id': 'GBUV71600656', 
 110             'title': 'ABC - Viva Love', 
 112             'timestamp': 1461830400, 
 113             'upload_date': '20160428', 
 115             'track': 'Viva Love', 
 119         'expected_warnings': ['Failed to download video versions info'], 
 121         # no genres available 
 122         'url': 'http://www.vevo.com/watch/INS171400764', 
 123         'only_matching': True, 
 125     _SMIL_BASE_URL 
= 'http://smil.lvl3.vevo.com' 
 141         0: 'youtube',  # only in AuthenticateVideo videoVersions 
 148     def _parse_smil_formats(self
, smil
, smil_url
, video_id
, namespace
=None, f4m_params
=None, transform_rtmp_url
=None): 
 150         els 
= smil
.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') 
 152             src 
= el
.attrib
['src'] 
 153             m 
= re
.match(r
'''(?xi) 
 156                     [/a-z0-9]+     # The directory and main part of the URL 
 158                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 159                     _(?P<vcodec>[a-z0-9]+) 
 161                     _(?P<acodec>[a-z0-9]+) 
 163                     \.[a-z0-9]+  # File extension 
 168             format_url 
= self
._SMIL
_BASE
_URL 
+ m
.group('path') 
 171                 'format_id': 'smil_' + m
.group('tbr'), 
 172                 'vcodec': m
.group('vcodec'), 
 173                 'acodec': m
.group('acodec'), 
 174                 'tbr': int(m
.group('tbr')), 
 175                 'vbr': int(m
.group('vbr')), 
 176                 'abr': int(m
.group('abr')), 
 177                 'ext': m
.group('ext'), 
 178                 'width': int(m
.group('width')), 
 179                 'height': int(m
.group('height')), 
 183     def _initialize_api(self
, video_id
): 
 184         req 
= sanitized_Request( 
 185             'http://www.vevo.com/auth', data
=b
'') 
 186         webpage 
= self
._download
_webpage
( 
 188             note
='Retrieving oauth token', 
 189             errnote
='Unable to retrieve oauth token') 
 191         if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage
: 
 192             self
.raise_geo_restricted( 
 193                 '%s said: This page is currently unavailable in your region' % self
.IE_NAME
) 
 195         auth_info 
= self
._parse
_json
(webpage
, video_id
) 
 196         self
._api
_url
_template 
= self
.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info
['access_token'] 
 198     def _call_api(self
, path
, *args
, **kwargs
): 
 199         return self
._download
_json
(self
._api
_url
_template 
% path
, *args
, **kwargs
) 
 201     def _real_extract(self
, url
): 
 202         video_id 
= self
._match
_id
(url
) 
 204         json_url 
= 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
 
 205         response 
= self
._download
_json
( 
 206             json_url
, video_id
, 'Downloading video info', 
 207             'Unable to download info', fatal
=False) or {} 
 208         video_info 
= response
.get('video') or {} 
 210         featured_artist 
= None 
 217                 self
._initialize
_api
(video_id
) 
 218             except ExtractorError
: 
 219                 ytid 
= response
.get('errorInfo', {}).get('ytid') 
 222                         'Video is geoblocked, trying with the YouTube video %s' % ytid
) 
 223                     return self
.url_result(ytid
, 'Youtube', ytid
) 
 227             video_info 
= self
._call
_api
( 
 228                 'video/%s' % video_id
, video_id
, 'Downloading api video info', 
 229                 'Failed to download video info') 
 231             video_versions 
= self
._call
_api
( 
 232                 'video/%s/streams' % video_id
, video_id
, 
 233                 'Downloading video versions info', 
 234                 'Failed to download video versions info', 
 237             # Some videos are only available via webpage (e.g. 
 238             # https://github.com/rg3/youtube-dl/issues/9366) 
 239             if not video_versions
: 
 240                 webpage 
= self
._download
_webpage
(url
, video_id
) 
 241                 video_versions 
= self
._extract
_json
(webpage
, video_id
, 'streams')[video_id
][0] 
 243             timestamp 
= parse_iso8601(video_info
.get('releaseDate')) 
 244             artists 
= video_info
.get('artists') 
 246                 artist 
= uploader 
= artists
[0]['name'] 
 247             view_count 
= int_or_none(video_info
.get('views', {}).get('total')) 
 249             for video_version 
in video_versions
: 
 250                 version 
= self
._VERSIONS
.get(video_version
['version']) 
 251                 version_url 
= video_version
.get('url') 
 255                 if '.ism' in version_url
: 
 257                 elif '.mpd' in version_url
: 
 258                     formats
.extend(self
._extract
_mpd
_formats
( 
 259                         version_url
, video_id
, mpd_id
='dash-%s' % version
, 
 260                         note
='Downloading %s MPD information' % version
, 
 261                         errnote
='Failed to download %s MPD information' % version
, 
 263                 elif '.m3u8' in version_url
: 
 264                     formats
.extend(self
._extract
_m
3u8_formats
( 
 265                         version_url
, video_id
, 'mp4', 'm3u8_native', 
 266                         m3u8_id
='hls-%s' % version
, 
 267                         note
='Downloading %s m3u8 information' % version
, 
 268                         errnote
='Failed to download %s m3u8 information' % version
, 
 271                     m 
= re
.search(r
'''(?xi) 
 272                         _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 273                         _(?P<vcodec>[a-z0-9]+) 
 275                         _(?P<acodec>[a-z0-9]+) 
 277                         \.(?P<ext>[a-z0-9]+)''', version_url
) 
 283                         'format_id': 'http-%s-%s' % (version
, video_version
['quality']), 
 284                         'vcodec': m
.group('vcodec'), 
 285                         'acodec': m
.group('acodec'), 
 286                         'vbr': int(m
.group('vbr')), 
 287                         'abr': int(m
.group('abr')), 
 288                         'ext': m
.group('ext'), 
 289                         'width': int(m
.group('width')), 
 290                         'height': int(m
.group('height')), 
 293             timestamp 
= int_or_none(self
._search
_regex
( 
 295                 video_info
['releaseDate'], 'release date', fatal
=False), 
 297             artists 
= video_info
.get('mainArtists') 
 299                 artist 
= uploader 
= artists
[0]['artistName'] 
 301             featured_artists 
= video_info
.get('featuredArtists') 
 303                 featured_artist 
= featured_artists
[0]['artistName'] 
 306             for video_version 
in video_info
['videoVersions']: 
 307                 version 
= self
._VERSIONS
.get(video_version
['version']) 
 308                 if version 
== 'youtube': 
 311                     source_type 
= self
._SOURCE
_TYPES
.get(video_version
['sourceType']) 
 312                     renditions 
= compat_etree_fromstring(video_version
['data']) 
 313                     if source_type 
== 'http': 
 314                         for rend 
in renditions
.findall('rendition'): 
 318                                 'format_id': 'http-%s-%s' % (version
, attr
['name']), 
 319                                 'height': int_or_none(attr
.get('frameheight')), 
 320                                 'width': int_or_none(attr
.get('frameWidth')), 
 321                                 'tbr': int_or_none(attr
.get('totalBitrate')), 
 322                                 'vbr': int_or_none(attr
.get('videoBitrate')), 
 323                                 'abr': int_or_none(attr
.get('audioBitrate')), 
 324                                 'vcodec': attr
.get('videoCodec'), 
 325                                 'acodec': attr
.get('audioCodec'), 
 327                     elif source_type 
== 'hls': 
 328                         formats
.extend(self
._extract
_m
3u8_formats
( 
 329                             renditions
.find('rendition').attrib
['url'], video_id
, 
 330                             'mp4', 'm3u8_native', m3u8_id
='hls-%s' % version
, 
 331                             note
='Downloading %s m3u8 information' % version
, 
 332                             errnote
='Failed to download %s m3u8 information' % version
, 
 334                     elif source_type 
== 'smil' and version 
== 'level3' and not smil_parsed
: 
 335                         formats
.extend(self
._extract
_smil
_formats
( 
 336                             renditions
.find('rendition').attrib
['url'], video_id
, False)) 
 338         self
._sort
_formats
(formats
) 
 340         track 
= video_info
['title'] 
 342             artist 
= '%s ft. %s' % (artist
, featured_artist
) 
 343         title 
= '%s - %s' % (artist
, track
) if artist 
else track
 
 345         genres 
= video_info
.get('genres') 
 347             genres
[0] if genres 
and isinstance(genres
, list) and 
 348             isinstance(genres
[0], compat_str
) else None) 
 350         is_explicit 
= video_info
.get('isExplicit') 
 351         if is_explicit 
is True: 
 353         elif is_explicit 
is False: 
 358         duration 
= video_info
.get('duration') 
 364             'thumbnail': video_info
.get('imageUrl') or video_info
.get('thumbnailUrl'), 
 365             'timestamp': timestamp
, 
 366             'uploader': uploader
, 
 367             'duration': duration
, 
 368             'view_count': view_count
, 
 369             'age_limit': age_limit
, 
 376 class VevoPlaylistIE(VevoBaseIE
): 
 377     _VALID_URL 
= r
'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' 
 380         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 382             'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', 
 383             'title': 'Best-Of: Birdman', 
 385         'playlist_count': 10, 
 387         'url': 'http://www.vevo.com/watch/genre/rock', 
 392         'playlist_count': 20, 
 394         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', 
 395         'md5': '32dcdfddddf9ec6917fc88ca26d36282', 
 397             'id': 'USCMV1100073', 
 399             'title': 'Birdman - Y.U. MAD', 
 400             'timestamp': 1323417600, 
 401             'upload_date': '20111209', 
 402             'uploader': 'Birdman', 
 405             'genre': 'Rap/Hip-Hop', 
 407         'expected_warnings': ['Unable to download SMIL file'], 
 409         'url': 'http://www.vevo.com/watch/genre/rock?index=0', 
 410         'only_matching': True, 
 413     def _real_extract(self
, url
): 
 414         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 415         playlist_id 
= mobj
.group('id') 
 416         playlist_kind 
= mobj
.group('kind') 
 418         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 420         qs 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
 421         index 
= qs
.get('index', [None])[0] 
 424             video_id 
= self
._search
_regex
( 
 425                 r
'<meta[^>]+content=(["\'])vevo
://video
/(?P
<id>.+?
)\
1[^
>]*>', 
 426                 webpage, 'video 
id', default=None, group='id') 
 428                 return self.url_result('vevo
:%s' % video_id, VevoIE.ie_key()) 
 430         playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind) 
 432         playlist = (list(playlists.values())[0] 
 433                     if playlist_kind == 'playlist
' else playlists[playlist_id]) 
 436             self.url_result('vevo
:%s' % src, VevoIE.ie_key()) 
 437             for src in playlist['isrcs
']] 
 439         return self.playlist_result( 
 440             entries, playlist.get('playlistId
') or playlist_id, 
 441             playlist.get('name
'), playlist.get('description
'))