]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
3f6020f74ec9eeefbddafc184d3f48cf5e436adb
   3 import xml
.etree
.ElementTree
 
   6 from .common 
import InfoExtractor
 
  13 class VevoIE(InfoExtractor
): 
  15     Accepts urls from vevo.com or in the format 'vevo:{id}' 
  16     (currently used by MTVIE) 
  18     _VALID_URL 
= r
'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)' 
  20         u
'url': u
'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', 
  21         u
'file': u
'GB1101300280.mp4', 
  22         u
"md5": u
"06bea460acb744eab74a9d7dcb4bfd61", 
  24             u
"upload_date": u
"20130624", 
  25             u
"uploader": u
"Hurts", 
  26             u
"title": u
"Somebody to Die For", 
  32     _SMIL_BASE_URL 
= 'http://smil.lvl3.vevo.com/' 
  34     def _formats_from_json(self
, video_info
): 
  35         last_version 
= {'version': -1} 
  36         for version 
in video_info
['videoVersions']: 
  37             # These are the HTTP downloads, other types are for different manifests 
  38             if version
['sourceType'] == 2: 
  39                 if version
['version'] > last_version
['version']: 
  40                     last_version 
= version
 
  41         if last_version
['version'] == -1: 
  42             raise ExtractorError(u
'Unable to extract last version of the video') 
  44         renditions 
= xml
.etree
.ElementTree
.fromstring(last_version
['data']) 
  46         # Already sorted from worst to best quality 
  47         for rend 
in renditions
.findall('rendition'): 
  49             format_note 
= '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
 
  52                 'format_id': attr
['name'], 
  53                 'format_note': format_note
, 
  54                 'height': int(attr
['frameheight']), 
  55                 'width': int(attr
['frameWidth']), 
  59     def _formats_from_smil(self
, smil_xml
): 
  61         smil_doc 
= xml
.etree
.ElementTree
.fromstring(smil_xml
.encode('utf-8')) 
  62         els 
= smil_doc
.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') 
  64             src 
= el
.attrib
['src'] 
  65             m 
= re
.match(r
'''(?xi) 
  68                     [/a-z0-9]+     # The directory and main part of the URL 
  70                     _(?P<width>[0-9]+)x(?P<height>[0-9]+) 
  71                     _(?P<vcodec>[a-z0-9]+) 
  73                     _(?P<acodec>[a-z0-9]+) 
  75                     \.[a-z0-9]+  # File extension 
  80             format_url 
= self
._SMIL
_BASE
_URL 
+ m
.group('path') 
  81             format_note 
= ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' % 
  85                 'format_id': u
'SMIL_' + m
.group('cbr'), 
  86                 'format_note': format_note
, 
  87                 'ext': m
.group('ext'), 
  88                 'width': int(m
.group('width')), 
  89                 'height': int(m
.group('height')), 
  93     def _real_extract(self
, url
): 
  94         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  95         video_id 
= mobj
.group('id') 
  97         json_url 
= 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
 
  98         info_json 
= self
._download
_webpage
(json_url
, video_id
, u
'Downloading json info') 
  99         video_info 
= json
.loads(info_json
)['video'] 
 101         formats 
= self
._formats
_from
_json
(video_info
) 
 103             smil_url 
= '%s/Video/V2/VFILE/%s/%sr.smil' % ( 
 104                 self
._SMIL
_BASE
_URL
, video_id
, video_id
.lower()) 
 105             smil_xml 
= self
._download
_webpage
(smil_url
, video_id
, 
 106                                               u
'Downloading SMIL info') 
 107             formats
.extend(self
._formats
_from
_smil
(smil_xml
)) 
 108         except ExtractorError 
as ee
: 
 109             if not isinstance(ee
.cause
, compat_HTTPError
): 
 111             self
._downloader
.report_warning( 
 112                 u
'Cannot download SMIL information, falling back to JSON ..') 
 114         timestamp_ms 
= int(self
._search
_regex
( 
 115             r
'/Date\((\d+)\)/', video_info
['launchDate'], u
'launch date')) 
 116         upload_date 
= datetime
.datetime
.fromtimestamp(timestamp_ms 
// 1000) 
 119             'title': video_info
['title'], 
 121             'thumbnail': video_info
['imageUrl'], 
 122             'upload_date': upload_date
.strftime('%Y%m%d'), 
 123             'uploader': video_info
['mainArtists'][0]['artistName'], 
 124             'duration': video_info
['duration'],