]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_etree_fromstring
 
  15 class VevoIE(InfoExtractor
): 
  17     Accepts urls from vevo.com or in the format 'vevo:{id}' 
  18     (currently used by MTVIE and MySpaceIE) 
  21         (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| 
  22            https?://cache\.vevo\.com/m/html/embed\.html\?video=| 
  23            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| 
  28         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 
  29         'md5': '95ee28ee45e70130e3ab02b0f579ae23', 
  33             'title': 'Somebody to Die For', 
  34             'upload_date': '20130624', 
  36             'timestamp': 1372057200, 
  39         'note': 'v3 SMIL format', 
  40         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 
  41         'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 
  45             'title': 'I Wish I Could Break Your Heart', 
  46             'upload_date': '20140219', 
  47             'uploader': 'Cassadee Pope', 
  48             'timestamp': 1392796919, 
  51         'note': 'Age-limited video', 
  52         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 
  56             'title': 'Tunnel Vision (Explicit)', 
  57             'upload_date': '20130703', 
  59             'uploader': 'Justin Timberlake', 
  60             'timestamp': 1372888800, 
  63         'note': 'No video_info', 
  64         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', 
  65         'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', 
  69             'title': 'Till I Die', 
  70             'upload_date': '20151207', 
  73             'timestamp': 1449468000, 
  76     _SMIL_BASE_URL 
= 'http://smil.lvl3.vevo.com' 
  92         0: 'youtube',  # only in AuthenticateVideo videoVersions 
  99     def _parse_smil_formats(self
, smil
, smil_url
, video_id
, namespace
=None, f4m_params
=None, transform_rtmp_url
=None): 
 101         els 
= smil
.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') 
 103             src 
= el
.attrib
['src'] 
 104             m 
= re
.match(r
'''(?xi) 
 107                     [/a-z0-9]+     # The directory and main part of the URL 
 109                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 110                     _(?P<vcodec>[a-z0-9]+) 
 112                     _(?P<acodec>[a-z0-9]+) 
 114                     \.[a-z0-9]+  # File extension 
 119             format_url 
= self
._SMIL
_BASE
_URL 
+ m
.group('path') 
 122                 'format_id': 'smil_' + m
.group('tbr'), 
 123                 'vcodec': m
.group('vcodec'), 
 124                 'acodec': m
.group('acodec'), 
 125                 'tbr': int(m
.group('tbr')), 
 126                 'vbr': int(m
.group('vbr')), 
 127                 'abr': int(m
.group('abr')), 
 128                 'ext': m
.group('ext'), 
 129                 'width': int(m
.group('width')), 
 130                 'height': int(m
.group('height')), 
 134     def _initialize_api(self
, video_id
): 
 135         req 
= sanitized_Request( 
 136             'http://www.vevo.com/auth', data
=b
'') 
 137         webpage 
= self
._download
_webpage
( 
 139             note
='Retrieving oauth token', 
 140             errnote
='Unable to retrieve oauth token') 
 142         if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage
: 
 143             raise ExtractorError( 
 144                 '%s said: This page is currently unavailable in your region.' % self
.IE_NAME
, expected
=True) 
 146         auth_info 
= self
._parse
_json
(webpage
, video_id
) 
 147         self
._api
_url
_template 
= self
.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info
['access_token'] 
 149     def _call_api(self
, path
, video_id
, note
, errnote
, fatal
=True): 
 150         return self
._download
_json
(self
._api
_url
_template 
% path
, video_id
, note
, errnote
) 
 152     def _real_extract(self
, url
): 
 153         video_id 
= self
._match
_id
(url
) 
 155         json_url 
= 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
 
 156         response 
= self
._download
_json
( 
 157             json_url
, video_id
, 'Downloading video info', 'Unable to download info') 
 158         video_info 
= response
.get('video') or {} 
 159         video_versions 
= video_info
.get('videoVersions') 
 166             if response
.get('statusCode') != 909: 
 167                 ytid 
= response
.get('errorInfo', {}).get('ytid') 
 170                         'Video is geoblocked, trying with the YouTube video %s' % ytid
) 
 171                     return self
.url_result(ytid
, 'Youtube', ytid
) 
 173                 if 'statusMessage' in response
: 
 174                     raise ExtractorError('%s said: %s' % ( 
 175                         self
.IE_NAME
, response
['statusMessage']), expected
=True) 
 176                 raise ExtractorError('Unable to extract videos') 
 178             self
._initialize
_api
(video_id
) 
 179             video_info 
= self
._call
_api
( 
 180                 'video/%s' % video_id
, video_id
, 'Downloading api video info', 
 181                 'Failed to download video info') 
 183             video_versions 
= self
._call
_api
( 
 184                 'video/%s/streams' % video_id
, video_id
, 
 185                 'Downloading video versions info', 
 186                 'Failed to download video versions info') 
 188             timestamp 
= parse_iso8601(video_info
.get('releaseDate')) 
 189             artists 
= video_info
.get('artists') 
 191                 uploader 
= artists
[0]['name'] 
 192             view_count 
= int_or_none(video_info
.get('views', {}).get('total')) 
 194             for video_version 
in video_versions
: 
 195                 version 
= self
._VERSIONS
.get(video_version
['version']) 
 196                 version_url 
= video_version
.get('url') 
 200                 if '.ism' in version_url
: 
 202                 elif '.mpd' in version_url
: 
 203                     formats
.extend(self
._extract
_mpd
_formats
( 
 204                         version_url
, video_id
, mpd_id
='dash-%s' % version
, 
 205                         note
='Downloading %s MPD information' % version
, 
 206                         errnote
='Failed to download %s MPD information' % version
, 
 208                 elif '.m3u8' in version_url
: 
 209                     formats
.extend(self
._extract
_m
3u8_formats
( 
 210                         version_url
, video_id
, 'mp4', 'm3u8_native', 
 211                         m3u8_id
='hls-%s' % version
, 
 212                         note
='Downloading %s m3u8 information' % version
, 
 213                         errnote
='Failed to download %s m3u8 information' % version
, 
 216                     m 
= re
.search(r
'''(?xi) 
 217                         _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 218                         _(?P<vcodec>[a-z0-9]+) 
 220                         _(?P<acodec>[a-z0-9]+) 
 222                         \.(?P<ext>[a-z0-9]+)''', version_url
) 
 228                         'format_id': 'http-%s-%s' % (version
, video_version
['quality']), 
 229                         'vcodec': m
.group('vcodec'), 
 230                         'acodec': m
.group('acodec'), 
 231                         'vbr': int(m
.group('vbr')), 
 232                         'abr': int(m
.group('abr')), 
 233                         'ext': m
.group('ext'), 
 234                         'width': int(m
.group('width')), 
 235                         'height': int(m
.group('height')), 
 238             timestamp 
= int_or_none(self
._search
_regex
( 
 240                 video_info
['releaseDate'], 'release date', fatal
=False), 
 242             artists 
= video_info
.get('mainArtists') 
 244                 uploader 
= artists
[0]['artistName'] 
 247             for video_version 
in video_info
['videoVersions']: 
 248                 version 
= self
._VERSIONS
.get(video_version
['version']) 
 249                 if version 
== 'youtube': 
 252                     source_type 
= self
._SOURCE
_TYPES
.get(video_version
['sourceType']) 
 253                     renditions 
= compat_etree_fromstring(video_version
['data']) 
 254                     if source_type 
== 'http': 
 255                         for rend 
in renditions
.findall('rendition'): 
 259                                 'format_id': 'http-%s-%s' % (version
, attr
['name']), 
 260                                 'height': int_or_none(attr
.get('frameheight')), 
 261                                 'width': int_or_none(attr
.get('frameWidth')), 
 262                                 'tbr': int_or_none(attr
.get('totalBitrate')), 
 263                                 'vbr': int_or_none(attr
.get('videoBitrate')), 
 264                                 'abr': int_or_none(attr
.get('audioBitrate')), 
 265                                 'vcodec': attr
.get('videoCodec'), 
 266                                 'acodec': attr
.get('audioCodec'), 
 268                     elif source_type 
== 'hls': 
 269                         formats
.extend(self
._extract
_m
3u8_formats
( 
 270                             renditions
.find('rendition').attrib
['url'], video_id
, 
 271                             'mp4', 'm3u8_native', m3u8_id
='hls-%s' % version
, 
 272                             note
='Downloading %s m3u8 information' % version
, 
 273                             errnote
='Failed to download %s m3u8 information' % version
, 
 275                     elif source_type 
== 'smil' and version 
== 'level3' and not smil_parsed
: 
 276                         formats
.extend(self
._extract
_smil
_formats
( 
 277                             renditions
.find('rendition').attrib
['url'], video_id
, False)) 
 279         self
._sort
_formats
(formats
) 
 281         title 
= video_info
['title'] 
 283         is_explicit 
= video_info
.get('isExplicit') 
 284         if is_explicit 
is True: 
 286         elif is_explicit 
is False: 
 291         duration 
= video_info
.get('duration') 
 297             'thumbnail': video_info
.get('imageUrl') or video_info
.get('thumbnailUrl'), 
 298             'timestamp': timestamp
, 
 299             'uploader': uploader
, 
 300             'duration': duration
, 
 301             'view_count': view_count
, 
 302             'age_limit': age_limit
,