]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vevo.py
4378b17800f1df78275d68a9525ca95585dc8b9d
3 import xml
.etree
.ElementTree
6 from .common
import InfoExtractor
13 class VevoIE(InfoExtractor
):
15 Accepts urls from vevo.com or in the format 'vevo:{id}'
16 (currently used by MTVIE)
18 _VALID_URL
= r
'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
20 u
'url': u
'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
21 u
'file': u
'GB1101300280.mp4',
22 u
"md5": u
"06bea460acb744eab74a9d7dcb4bfd61",
24 u
"upload_date": u
"20130624",
25 u
"uploader": u
"Hurts",
26 u
"title": u
"Somebody to Die For",
32 _SMIL_BASE_URL
= 'http://smil.lvl3.vevo.com/'
34 def _formats_from_json(self
, video_info
):
35 last_version
= {'version': -1}
36 for version
in video_info
['videoVersions']:
37 # These are the HTTP downloads, other types are for different manifests
38 if version
['sourceType'] == 2:
39 if version
['version'] > last_version
['version']:
40 last_version
= version
41 if last_version
['version'] == -1:
42 raise ExtractorError(u
'Unable to extract last version of the video')
44 renditions
= xml
.etree
.ElementTree
.fromstring(last_version
['data'])
46 # Already sorted from worst to best quality
47 for rend
in renditions
.findall('rendition'):
49 format_note
= '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
52 'format_id': attr
['name'],
53 'format_note': format_note
,
54 'height': int(attr
['frameheight']),
55 'width': int(attr
['frameWidth']),
59 def _formats_from_smil(self
, smil_xml
):
61 smil_doc
= xml
.etree
.ElementTree
.fromstring(smil_xml
.encode('utf-8'))
62 els
= smil_doc
.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
64 src
= el
.attrib
['src']
65 m
= re
.match(r
'''(?xi)
68 [/a-z0-9]+ # The directory and main part of the URL
70 _(?P<width>[0-9]+)x(?P<height>[0-9]+)
71 _(?P<vcodec>[a-z0-9]+)
73 _(?P<acodec>[a-z0-9]+)
75 \.[a-z0-9]+ # File extension
80 format_url
= self
._SMIL
_BASE
_URL
+ m
.group('path')
83 'format_id': u
'SMIL_' + m
.group('cbr'),
84 'vcodec': m
.group('vcodec'),
85 'acodec': m
.group('acodec'),
86 'vbr': int(m
.group('vbr')),
87 'abr': int(m
.group('abr')),
88 'ext': m
.group('ext'),
89 'width': int(m
.group('width')),
90 'height': int(m
.group('height')),
94 def _real_extract(self
, url
):
95 mobj
= re
.match(self
._VALID
_URL
, url
)
96 video_id
= mobj
.group('id')
98 json_url
= 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
99 info_json
= self
._download
_webpage
(json_url
, video_id
, u
'Downloading json info')
100 video_info
= json
.loads(info_json
)['video']
102 formats
= self
._formats
_from
_json
(video_info
)
104 smil_url
= '%s/Video/V2/VFILE/%s/%sr.smil' % (
105 self
._SMIL
_BASE
_URL
, video_id
, video_id
.lower())
106 smil_xml
= self
._download
_webpage
(smil_url
, video_id
,
107 u
'Downloading SMIL info')
108 formats
.extend(self
._formats
_from
_smil
(smil_xml
))
109 except ExtractorError
as ee
:
110 if not isinstance(ee
.cause
, compat_HTTPError
):
112 self
._downloader
.report_warning(
113 u
'Cannot download SMIL information, falling back to JSON ..')
115 timestamp_ms
= int(self
._search
_regex
(
116 r
'/Date\((\d+)\)/', video_info
['launchDate'], u
'launch date'))
117 upload_date
= datetime
.datetime
.fromtimestamp(timestamp_ms
// 1000)
120 'title': video_info
['title'],
122 'thumbnail': video_info
['imageUrl'],
123 'upload_date': upload_date
.strftime('%Y%m%d'),
124 'uploader': video_info
['mainArtists'][0]['artistName'],
125 'duration': video_info
['duration'],