X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/3ae74f711947d73bf6627bf312edeec41cec85c3..5dafebb045625934d6c679b05132c61c5f9793c5:/youtube_dl/extractor/vevo.py diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 70408c4..152fef4 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,53 +1,303 @@ +from __future__ import unicode_literals + import re -import json from .common import InfoExtractor +from ..compat import compat_etree_fromstring from ..utils import ( ExtractorError, + int_or_none, + sanitized_Request, + parse_iso8601, ) + class VevoIE(InfoExtractor): - """ + ''' Accepts urls from vevo.com or in the format 'vevo:{id}' - (currently used by MTVIE) - """ - _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P.*?)(\?|$)' - _TEST = { - u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', - u'file': u'GB1101300280.mp4', - u'md5': u'06bea460acb744eab74a9d7dcb4bfd61', - u'info_dict': { - u"upload_date": u"20130624", - u"uploader": u"Hurts", - u"title": u"Somebody to Die For" - } + (currently used by MTVIE and MySpaceIE) + ''' + _VALID_URL = r'''(?x) + (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| + https?://cache\.vevo\.com/m/html/embed\.html\?video=| + https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| + vevo:) + (?P[^&?#]+)''' + + _TESTS = [{ + 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', + 'md5': '95ee28ee45e70130e3ab02b0f579ae23', + 'info_dict': { + 'id': 'GB1101300280', + 'ext': 'mp4', + 'title': 'Somebody to Die For', + 'upload_date': '20130624', + 'uploader': 'Hurts', + 'timestamp': 1372057200, + }, + }, { + 'note': 'v3 SMIL format', + 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', + 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', + 'info_dict': { + 'id': 'USUV71302923', + 'ext': 'mp4', + 'title': 'I Wish I Could Break Your Heart', + 'upload_date': '20140219', + 'uploader': 'Cassadee Pope', + 'timestamp': 1392796919, + }, + }, { + 'note': 'Age-limited video', + 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', + 'info_dict': { + 'id': 'USRV81300282', + 'ext': 'mp4', + 'title': 'Tunnel Vision (Explicit)', + 'upload_date': '20130703', + 'age_limit': 18, + 'uploader': 'Justin Timberlake', + 'timestamp': 1372888800, + }, + }, { + 'note': 'No video_info', + 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', + 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0', + 'info_dict': { + 'id': 'USUV71503000', + 'ext': 'mp4', + 'title': 'Till I Die', + 'upload_date': '20151207', + 'age_limit': 18, + 'uploader': 'K Camp', + 'timestamp': 1449468000, + }, + }] + _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' + _SOURCE_TYPES = { + 0: 'youtube', + 1: 'brightcove', + 2: 'http', + 3: 'hls_ios', + 4: 'hls', + 5: 'smil', # http + 7: 'f4m_cc', + 8: 'f4m_ak', + 9: 'f4m_l3', + 10: 'ism', + 13: 'smil', # rtmp + 18: 'dash', } + _VERSIONS = { + 0: 'youtube', # only in AuthenticateVideo videoVersions + 1: 'level3', + 2: 'akamai', + 3: 'level3', + 4: 'amazon', + } + + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): + formats = [] + els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') + for el in els: + src = el.attrib['src'] + m = re.match(r'''(?xi) + (?P[a-z0-9]+): + (?P + [/a-z0-9]+ # The directory and main part of the URL + _(?P[0-9]+)k + _(?P[0-9]+)x(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + \.[a-z0-9]+ # File extension + )''', src) + if not m: + continue + + format_url = self._SMIL_BASE_URL + m.group('path') + formats.append({ + 'url': format_url, + 'format_id': 'smil_' + m.group('tbr'), + 'vcodec': m.group('vcodec'), + 'acodec': m.group('acodec'), + 'tbr': int(m.group('tbr')), + 'vbr': int(m.group('vbr')), + 'abr': int(m.group('abr')), + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + return formats + + def _initialize_api(self, video_id): + req = sanitized_Request( + 'http://www.vevo.com/auth', data=b'') + webpage = self._download_webpage( + req, None, + note='Retrieving oauth token', + errnote='Unable to retrieve oauth token') + + if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: + raise ExtractorError( + '%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) + + auth_info = self._parse_json(webpage, video_id) + self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] + + def _call_api(self, path, video_id, note, errnote, fatal=True): + return self._download_json(self._api_url_template % path, video_id, note, errnote) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - json_url = 'http://www.vevo.com/data/video/%s' % video_id - base_url = 'http://smil.lvl3.vevo.com' - videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower()) - info_json = self._download_webpage(json_url, video_id, u'Downloading json info') - links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls') - - self.report_extraction(video_id) - video_info = json.loads(info_json) - m_urls = list(re.finditer(r'