debian/control: Remove mplayer2 from recommends. (Closes: #841187)

[youtubedl] / youtube_dl / extractor / mtv.py
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index 2f455680ebba41074513f7dd33b76e5c269cb142..03351917e71cdfbfb98ecb329eecad9500b288e4 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -4,7 +4,6 @@ import re
  
  from .common import InfoExtractor
  from ..compat import (
  
  from .common import InfoExtractor
  from ..compat import (
-    compat_urllib_parse_urlencode,
      compat_str,
      compat_xpath,
  )
      compat_str,
      compat_xpath,
  )
@@ -14,12 +13,14 @@ from ..utils import (
      fix_xml_ampersands,
      float_or_none,
      HEADRequest,
      fix_xml_ampersands,
      float_or_none,
      HEADRequest,
+    NO_DEFAULT,
+    RegexNotFoundError,
      sanitized_Request,
      strip_or_none,
      timeconvert,
      unescapeHTML,
      sanitized_Request,
      strip_or_none,
      timeconvert,
      unescapeHTML,
+    update_url_query,
      url_basename,
      url_basename,
-    RegexNotFoundError,
      xpath_text,
  )
  
      xpath_text,
  )
  
@@ -36,6 +37,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
      def _id_from_uri(uri):
          return uri.split(':')[-1]
  
      def _id_from_uri(uri):
          return uri.split(':')[-1]
  
+    @staticmethod
+    def _remove_template_parameter(url):
+        # Remove the templates, like &device={device}
+        return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
+
      # This was originally implemented for ComedyCentral, but it also works here
      @classmethod
      def _transform_rtmp_url(cls, rtmp_video_url):
      # This was originally implemented for ComedyCentral, but it also works here
      @classmethod
      def _transform_rtmp_url(cls, rtmp_video_url):
@@ -117,9 +123,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
          video_id = self._id_from_uri(uri)
          self.report_extraction(video_id)
          content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
          video_id = self._id_from_uri(uri)
          self.report_extraction(video_id)
          content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
-        mediagen_url = content_el.attrib['url']
-        # Remove the templates, like &device={device}
-        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
+        mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
          if 'acceptMethods' not in mediagen_url:
              mediagen_url += '&' if '?' in mediagen_url else '?'
              mediagen_url += 'acceptMethods=fms'
          if 'acceptMethods' not in mediagen_url:
              mediagen_url += '&' if '?' in mediagen_url else '?'
              mediagen_url += 'acceptMethods=fms'
@@ -178,12 +182,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
          data = {'uri': uri}
          if self._LANG:
              data['lang'] = self._LANG
          data = {'uri': uri}
          if self._LANG:
              data['lang'] = self._LANG
-        return compat_urllib_parse_urlencode(data)
+        return data
  
      def _get_videos_info(self, uri):
          video_id = self._id_from_uri(uri)
          feed_url = self._get_feed_url(uri)
  
      def _get_videos_info(self, uri):
          video_id = self._id_from_uri(uri)
          feed_url = self._get_feed_url(uri)
-        info_url = feed_url + '?' + self._get_feed_query(uri)
+        info_url = update_url_query(feed_url, self._get_feed_query(uri))
          return self._get_videos_info_from_url(info_url, video_id)
  
      def _get_videos_info_from_url(self, url, video_id):
          return self._get_videos_info_from_url(info_url, video_id)
  
      def _get_videos_info_from_url(self, url, video_id):
@@ -198,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
              [self._get_video_info(item) for item in idoc.findall('.//item')],
              playlist_title=title, playlist_description=description)
  
              [self._get_video_info(item) for item in idoc.findall('.//item')],
              playlist_title=title, playlist_description=description)
  
-    def _extract_mgid(self, webpage):
+    def _extract_mgid(self, webpage, default=NO_DEFAULT):
          try:
              # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
              # or http://media.mtvnservices.com/{mgid}
          try:
              # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
              # or http://media.mtvnservices.com/{mgid}
@@ -218,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
              sm4_embed = self._html_search_meta(
                  'sm4:video:embed', webpage, 'sm4 embed', default='')
              mgid = self._search_regex(
              sm4_embed = self._html_search_meta(
                  'sm4:video:embed', webpage, 'sm4 embed', default='')
              mgid = self._search_regex(
-                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
+                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default)
          return mgid
  
      def _real_extract(self, url):
          return mgid
  
      def _real_extract(self, url):
@@ -256,13 +260,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
  
      def _get_feed_url(self, uri):
          video_id = self._id_from_uri(uri)
  
      def _get_feed_url(self, uri):
          video_id = self._id_from_uri(uri)
-        site_id = uri.replace(video_id, '')
-        config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
-                      'context4/context5/config.xml'.format(site_id))
-        config_doc = self._download_xml(config_url, video_id)
-        feed_node = config_doc.find('.//feed')
-        feed_url = feed_node.text.strip().split('?')[0]
-        return feed_url
+        config = self._download_json(
+            'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
+        return self._remove_template_parameter(config['feedWithQueryParams'])
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -271,6 +271,29 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
  
  
  class MTVIE(MTVServicesInfoExtractor):
  
  
  class MTVIE(MTVServicesInfoExtractor):
+    IE_NAME = 'mtv'
+    _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)'
+    _FEED_URL = 'http://www.mtv.com/feeds/mrss/'
+
+    _TESTS = [{
+        'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer',
+        'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
+        'info_dict': {
+            'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
+            'ext': 'mp4',
+            'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
+            'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
+            'timestamp': 1468846800,
+            'upload_date': '20160718',
+        },
+    }, {
+        'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
+        'only_matching': True,
+    }]
+
+
+class MTVVideoIE(MTVServicesInfoExtractor):
+    IE_NAME = 'mtv:video'
      _VALID_URL = r'''(?x)^https?://
          (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
             m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
      _VALID_URL = r'''(?x)^https?://
          (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
             m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''