]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
c17094f8193f7678cc3d0a912c3d970f38e6bf7c
   1 from __future__ 
import unicode_literals
 
   4 import xml
.etree
.ElementTree
 
   6 from .common 
import InfoExtractor
 
  16 class VevoIE(InfoExtractor
): 
  18     Accepts urls from vevo.com or in the format 'vevo:{id}' 
  19     (currently used by MTVIE and MySpaceIE) 
  22         (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| 
  23            https?://cache\.vevo\.com/m/html/embed\.html\?video=| 
  24            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| 
  29         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 
  30         "md5": "95ee28ee45e70130e3ab02b0f579ae23", 
  34             "upload_date": "20130624", 
  36             "title": "Somebody to Die For", 
  40             # timestamp and upload_date are often incorrect; seem to change randomly 
  44         'note': 'v3 SMIL format', 
  45         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 
  46         'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 
  50             'upload_date': '20140219', 
  51             'uploader': 'Cassadee Pope', 
  52             'title': 'I Wish I Could Break Your Heart', 
  58         'note': 'Age-limited video', 
  59         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 
  64             'title': 'Tunnel Vision (Explicit)', 
  65             'uploader': 'Justin Timberlake', 
  66             'upload_date': 're:2013070[34]', 
  70             'skip_download': 'true', 
  73     _SMIL_BASE_URL 
= 'http://smil.lvl3.vevo.com/' 
  75     def _real_initialize(self
): 
  76         req 
= compat_urllib_request
.Request( 
  77             'http://www.vevo.com/auth', data
=b
'') 
  78         webpage 
= self
._download
_webpage
( 
  80             note
='Retrieving oauth token', 
  81             errnote
='Unable to retrieve oauth token', 
  84             self
._oauth
_token 
= None 
  86             self
._oauth
_token 
= self
._search
_regex
( 
  87                 r
'access_token":\s*"([^"]+)"', 
  88                 webpage
, 'access token', fatal
=False) 
  90     def _formats_from_json(self
, video_info
): 
  91         last_version 
= {'version': -1} 
  92         for version 
in video_info
['videoVersions']: 
  93             # These are the HTTP downloads, other types are for different manifests 
  94             if version
['sourceType'] == 2: 
  95                 if version
['version'] > last_version
['version']: 
  96                     last_version 
= version
 
  97         if last_version
['version'] == -1: 
  98             raise ExtractorError('Unable to extract last version of the video') 
 100         renditions 
= xml
.etree
.ElementTree
.fromstring(last_version
['data']) 
 102         # Already sorted from worst to best quality 
 103         for rend 
in renditions
.findall('rendition'): 
 105             format_note 
= '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
 
 108                 'format_id': attr
['name'], 
 109                 'format_note': format_note
, 
 110                 'height': int(attr
['frameheight']), 
 111                 'width': int(attr
['frameWidth']), 
 115     def _formats_from_smil(self
, smil_xml
): 
 117         smil_doc 
= xml
.etree
.ElementTree
.fromstring(smil_xml
.encode('utf-8')) 
 118         els 
= smil_doc
.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') 
 120             src 
= el
.attrib
['src'] 
 121             m 
= re
.match(r
'''(?xi) 
 124                     [/a-z0-9]+     # The directory and main part of the URL 
 126                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
 127                     _(?P<vcodec>[a-z0-9]+) 
 129                     _(?P<acodec>[a-z0-9]+) 
 131                     \.[a-z0-9]+  # File extension 
 136             format_url 
= self
._SMIL
_BASE
_URL 
+ m
.group('path') 
 139                 'format_id': 'SMIL_' + m
.group('cbr'), 
 140                 'vcodec': m
.group('vcodec'), 
 141                 'acodec': m
.group('acodec'), 
 142                 'vbr': int(m
.group('vbr')), 
 143                 'abr': int(m
.group('abr')), 
 144                 'ext': m
.group('ext'), 
 145                 'width': int(m
.group('width')), 
 146                 'height': int(m
.group('height')), 
 150     def _download_api_formats(self
, video_id
): 
 151         if not self
._oauth
_token
: 
 152             self
._downloader
.report_warning( 
 153                 'No oauth token available, skipping API HLS download') 
 156         api_url 
= 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( 
 157             video_id
, self
._oauth
_token
) 
 158         api_data 
= self
._download
_json
( 
 160             note
='Downloading HLS formats', 
 161             errnote
='Failed to download HLS format list', fatal
=False) 
 165         m3u8_url 
= api_data
[0]['url'] 
 166         return self
._extract
_m
3u8_formats
( 
 167             m3u8_url
, video_id
, entry_protocol
='m3u8_native', ext
='mp4', 
 170     def _real_extract(self
, url
): 
 171         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 172         video_id 
= mobj
.group('id') 
 174         json_url 
= 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
 
 175         response 
= self
._download
_json
(json_url
, video_id
) 
 176         video_info 
= response
['video'] 
 179             if 'statusMessage' in response
: 
 180                 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, response
['statusMessage']), expected
=True) 
 181             raise ExtractorError('Unable to extract videos') 
 183         formats 
= self
._formats
_from
_json
(video_info
) 
 185         is_explicit 
= video_info
.get('isExplicit') 
 186         if is_explicit 
is True: 
 188         elif is_explicit 
is False: 
 193         # Download via HLS API 
 194         formats
.extend(self
._download
_api
_formats
(video_id
)) 
 197         smil_blocks 
= sorted(( 
 198             f 
for f 
in video_info
['videoVersions'] 
 199             if f
['sourceType'] == 13), 
 200             key
=lambda f
: f
['version']) 
 201         smil_url 
= '%s/Video/V2/VFILE/%s/%sr.smil' % ( 
 202             self
._SMIL
_BASE
_URL
, video_id
, video_id
.lower()) 
 204             smil_url_m 
= self
._search
_regex
( 
 205                 r
'url="([^"]+)"', smil_blocks
[-1]['data'], 'SMIL URL', 
 207             if smil_url_m 
is not None: 
 208                 smil_url 
= smil_url_m
 
 210             smil_xml 
= self
._download
_webpage
( 
 211                 smil_url
, video_id
, 'Downloading SMIL info', fatal
=False) 
 213                 formats
.extend(self
._formats
_from
_smil
(smil_xml
)) 
 215         self
._sort
_formats
(formats
) 
 216         timestamp_ms 
= int_or_none(self
._search
_regex
( 
 218             video_info
['launchDate'], 'launch date', fatal
=False)) 
 222             'title': video_info
['title'], 
 224             'thumbnail': video_info
['imageUrl'], 
 225             'timestamp': timestamp_ms 
// 1000, 
 226             'uploader': video_info
['mainArtists'][0]['artistName'], 
 227             'duration': video_info
['duration'], 
 228             'age_limit': age_limit
,