X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/961c212c4f97846f00004b37e8dbd94b124a2f99..a497d0e55172891fd4925626374a7afdd811e00f:/youtube_dl/extractor/vevo.py diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 49a249a..e458ac9 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,43 +1,131 @@ +from __future__ import unicode_literals + import re -import json +import xml.etree.ElementTree +import datetime from .common import InfoExtractor from ..utils import ( + compat_HTTPError, ExtractorError, ) + class VevoIE(InfoExtractor): """ - Accecps urls from vevo.com or in the format 'vevo:{id}' + Accepts urls from vevo.com or in the format 'vevo:{id}' (currently used by MTVIE) """ - _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P.*)$' + _VALID_URL = r'''(?x) + (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?| + https?://cache\.vevo\.com/m/html/embed\.html\?video=| + https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| + vevo:) + (?P[^&?#]+)''' + _TESTS = [{ + 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', + 'file': 'GB1101300280.mp4', + "md5": "06bea460acb744eab74a9d7dcb4bfd61", + 'info_dict': { + "upload_date": "20130624", + "uploader": "Hurts", + "title": "Somebody to Die For", + "duration": 230.12, + "width": 1920, + "height": 1080, + } + }] + _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' + + def _formats_from_json(self, video_info): + last_version = {'version': -1} + for version in video_info['videoVersions']: + # These are the HTTP downloads, other types are for different manifests + if version['sourceType'] == 2: + if version['version'] > last_version['version']: + last_version = version + if last_version['version'] == -1: + raise ExtractorError('Unable to extract last version of the video') + + renditions = xml.etree.ElementTree.fromstring(last_version['data']) + formats = [] + # Already sorted from worst to best quality + for rend in renditions.findall('rendition'): + attr = rend.attrib + format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr + formats.append({ + 'url': attr['url'], + 'format_id': attr['name'], + 'format_note': format_note, + 'height': int(attr['frameheight']), + 'width': int(attr['frameWidth']), + }) + return formats + + def _formats_from_smil(self, smil_xml): + formats = [] + smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) + els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') + for el in els: + src = el.attrib['src'] + m = re.match(r'''(?xi) + (?P[a-z0-9]+): + (?P + [/a-z0-9]+ # The directory and main part of the URL + _(?P[0-9]+)k + _(?P[0-9]+)x(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + \.[a-z0-9]+ # File extension + )''', src) + if not m: + continue + + format_url = self._SMIL_BASE_URL + m.group('path') + formats.append({ + 'url': format_url, + 'format_id': 'SMIL_' + m.group('cbr'), + 'vcodec': m.group('vcodec'), + 'acodec': m.group('acodec'), + 'vbr': int(m.group('vbr')), + 'abr': int(m.group('abr')), + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + return formats def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - json_url = 'http://www.vevo.com/data/video/%s' % video_id - base_url = 'http://smil.lvl3.vevo.com' - videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower()) - info_json = self._download_webpage(json_url, video_id, u'Downloading json info') - links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls') - - self.report_extraction(video_id) - video_info = json.loads(info_json) - m_urls = list(re.finditer(r'