X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/33cd347759d6d999325ebf3c69b7ed5692c343b2..779bc665c512f2802f1436a30b6b09ee7ad83e02:/youtube_dl/extractor/mtv.py?ds=sidebyside diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8f95657..e5ca41b 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -1,43 +1,27 @@ +from __future__ import unicode_literals + import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( compat_urllib_parse, + compat_urllib_request, ExtractorError, + find_xpath_attr, + fix_xml_ampersands, + HEADRequest, + unescapeHTML, + url_basename, + RegexNotFoundError, ) + def _media_xml_tag(tag): return '{http://search.yahoo.com/mrss/}%s' % tag -class MTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P[0-9]+)/[^/]+$' - - _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' - - _TESTS = [ - { - u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - u'file': u'853555.mp4', - u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', - u'info_dict': { - u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', - u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', - }, - }, - { - u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', - u'file': u'USCJY1331283.mp4', - u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', - u'info_dict': { - u'title': u'Everything Has Changed', - u'upload_date': u'20130606', - u'uploader': u'Taylor Swift', - }, - u'skip': u'VEVO is only available in some countries', - }, - ] +class MTVServicesInfoExtractor(InfoExtractor): + _MOBILE_TEMPLATE = None @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -47,75 +31,197 @@ class MTVIE(InfoExtractor): def _transform_rtmp_url(rtmp_video_url): m = re.match(r'^rtmpe?://.*?/(?Pgsp\..+?/.*)$', rtmp_video_url) if not m: - raise ExtractorError(u'Cannot transform RTMP url') + return rtmp_video_url base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' return base + m.group('finalid') def _get_thumbnail_url(self, uri, itemdoc): - return 'http://mtv.mtvnimages.com/uri/' + uri - - def _extract_video_url(self, metadataXml): - if '/error_country_block.swf' in metadataXml: - raise ExtractorError(u'This video is not available from your country.', expected=True) - mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8')) - renditions = mdoc.findall('.//rendition') - - # For now, always pick the highest quality. - rendition = renditions[-1] - - try: - _,_,ext = rendition.attrib['type'].partition('/') - format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] - rtmp_video_url = rendition.find('./src').text - except KeyError: - raise ExtractorError('Invalid rendition field.') - video_url = self._transform_rtmp_url(rtmp_video_url) - return {'ext': ext, 'url': video_url, 'format': format} + search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) + thumb_node = itemdoc.find(search_path) + if thumb_node is None: + return None + else: + return thumb_node.attrib['url'] + + def _extract_mobile_video_formats(self, mtvn_id): + webpage_url = self._MOBILE_TEMPLATE % mtvn_id + req = compat_urllib_request.Request(webpage_url) + # Otherwise we get a webpage that would execute some javascript + req.add_header('Youtubedl-user-agent', 'curl/7') + webpage = self._download_webpage(req, mtvn_id, + 'Downloading mobile page') + metrics_url = unescapeHTML(self._search_regex(r'[0-9]+)/[^/]+$| + m\.mtv\.com/videos/video\.rbml\?.*?id=(?P[^&]+))''' - webpage = self._download_webpage(url, video_id) + _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' - # Some videos come from Vevo.com - m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', - webpage, re.DOTALL) - if m_vevo: - vevo_id = m_vevo.group(1); - self.to_screen(u'Vevo video detected: %s' % vevo_id) - return self.url_result('vevo:%s' % vevo_id, ie='Vevo') + _TESTS = [ + { + 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', + 'file': '853555.mp4', + 'md5': '850f3f143316b1e71fa56a4edfd6e0f8', + 'info_dict': { + 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', + 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', + }, + }, + { + 'add_ie': ['Vevo'], + 'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', + 'file': 'USCJY1331283.mp4', + 'md5': '73b4e7fcadd88929292fe52c3ced8caf', + 'info_dict': { + 'title': 'Everything Has Changed', + 'upload_date': '20130606', + 'uploader': 'Taylor Swift', + }, + 'skip': 'VEVO is only available in some countries', + }, + ] + + def _get_thumbnail_url(self, uri, itemdoc): + return 'http://mtv.mtvnimages.com/uri/' + uri - uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri') + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + uri = mobj.groupdict().get('mgid') + if uri is None: + webpage = self._download_webpage(url, video_id) + + # Some videos come from Vevo.com + m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', + webpage, re.DOTALL) + if m_vevo: + vevo_id = m_vevo.group(1); + self.to_screen('Vevo video detected: %s' % vevo_id) + return self.url_result('vevo:%s' % vevo_id, ie='Vevo') + + uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') return self._get_videos_info(uri) + + +class MTVIggyIE(MTVServicesInfoExtractor): + IE_NAME = 'mtviggy.com' + _VALID_URL = r'https?://www\.mtviggy\.com/videos/.+' + _TEST = { + 'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/', + 'info_dict': { + 'id': '984696', + 'ext': 'mp4', + 'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet', + } + } + _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'