X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/63a6927374492ef47c8fd6de67d0760ace4dd0ed..22bdf6fb907cce994fbfe062879d2a991e597a3f:/youtube_dl/extractor/mtv.py?ds=sidebyside diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ed11f52..228b42d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -1,17 +1,28 @@ +from __future__ import unicode_literals + import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( compat_urllib_parse, + compat_urllib_request, ExtractorError, + find_xpath_attr, + fix_xml_ampersands, + HEADRequest, + unescapeHTML, + url_basename, + RegexNotFoundError, ) + def _media_xml_tag(tag): return '{http://search.yahoo.com/mrss/}%s' % tag class MTVServicesInfoExtractor(InfoExtractor): + _MOBILE_TEMPLATE = None + @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -25,6 +36,9 @@ class MTVServicesInfoExtractor(InfoExtractor): base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' return base + m.group('finalid') + def _get_feed_url(self, uri): + return self._FEED_URL + def _get_thumbnail_url(self, uri, itemdoc): search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) thumb_node = itemdoc.find(search_path) @@ -33,10 +47,29 @@ class MTVServicesInfoExtractor(InfoExtractor): else: return thumb_node.attrib['url'] - def _extract_video_formats(self, metadataXml): - if '/error_country_block.swf' in metadataXml: - raise ExtractorError(u'This video is not available from your country.', expected=True) - mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8')) + def _extract_mobile_video_formats(self, mtvn_id): + webpage_url = self._MOBILE_TEMPLATE % mtvn_id + req = compat_urllib_request.Request(webpage_url) + # Otherwise we get a webpage that would execute some javascript + req.add_header('Youtubedl-user-agent', 'curl/7') + webpage = self._download_webpage(req, mtvn_id, + 'Downloading mobile page') + metrics_url = unescapeHTML(self._search_regex(r'.+?)(\?|/|$)' + + _TEST = { + # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/ + 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906', + 'md5': 'cb349b21a7897164cede95bd7bf3fbb9', + 'info_dict': { + 'id': '1043906', + 'ext': 'mp4', + 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds', + 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.', + }, + } + + def _get_feed_url(self, uri): + video_id = self._id_from_uri(uri) + site_id = uri.replace(video_id, '') + config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id) + config_doc = self._download_xml(config_url, video_id) + feed_node = config_doc.find('.//feed') + feed_url = feed_node.text.strip().split('?')[0] + return feed_url + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + mgid = mobj.group('mgid') + return self._get_videos_info(mgid) + class MTVIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// @@ -101,25 +207,25 @@ class MTVIE(MTVServicesInfoExtractor): _TESTS = [ { - u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - u'file': u'853555.mp4', - u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', - u'info_dict': { - u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', - u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', + 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', + 'file': '853555.mp4', + 'md5': '850f3f143316b1e71fa56a4edfd6e0f8', + 'info_dict': { + 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', + 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', }, }, { - u'add_ie': ['Vevo'], - u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', - u'file': u'USCJY1331283.mp4', - u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', - u'info_dict': { - u'title': u'Everything Has Changed', - u'upload_date': u'20130606', - u'uploader': u'Taylor Swift', + 'add_ie': ['Vevo'], + 'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', + 'file': 'USCJY1331283.mp4', + 'md5': '73b4e7fcadd88929292fe52c3ced8caf', + 'info_dict': { + 'title': 'Everything Has Changed', + 'upload_date': '20130606', + 'uploader': 'Taylor Swift', }, - u'skip': u'VEVO is only available in some countries', + 'skip': 'VEVO is only available in some countries', }, ] @@ -129,7 +235,7 @@ class MTVIE(MTVServicesInfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - uri = mobj.group('mgid') + uri = mobj.groupdict().get('mgid') if uri is None: webpage = self._download_webpage(url, video_id) @@ -138,8 +244,22 @@ class MTVIE(MTVServicesInfoExtractor): webpage, re.DOTALL) if m_vevo: vevo_id = m_vevo.group(1); - self.to_screen(u'Vevo video detected: %s' % vevo_id) + self.to_screen('Vevo video detected: %s' % vevo_id) return self.url_result('vevo:%s' % vevo_id, ie='Vevo') - uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri') + uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') return self._get_videos_info(uri) + + +class MTVIggyIE(MTVServicesInfoExtractor): + IE_NAME = 'mtviggy.com' + _VALID_URL = r'https?://www\.mtviggy\.com/videos/.+' + _TEST = { + 'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/', + 'info_dict': { + 'id': '984696', + 'ext': 'mp4', + 'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet', + } + } + _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'