]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/metacritic.py
New upstream version 2019.01.16
[youtubedl] / youtube_dl / extractor / metacritic.py
index 449138b569f80c97154ea79ac874617efc484a3f..7d468d78bab45ac4a83bd8aa531dfd67b42c6eb6 100644 (file)
@@ -1,31 +1,44 @@
+from __future__ import unicode_literals
+
 import re
 import re
-import xml.etree.ElementTree
-import operator
 
 from .common import InfoExtractor
 
 from .common import InfoExtractor
+from ..utils import (
+    fix_xml_ampersands,
+)
 
 
 class MetacriticIE(InfoExtractor):
 
 
 class MetacriticIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
-
-    _TEST = {
-        u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
-        u'file': u'3698222.mp4',
-        u'info_dict': {
-            u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
-            u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
-            u'duration': 221,
+    _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
+        'info_dict': {
+            'id': '3698222',
+            'ext': 'mp4',
+            'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
+            'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
+            'duration': 221,
+        },
+        'skip': 'Not providing trailers anymore',
+    }, {
+        'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
+        'info_dict': {
+            'id': '5740315',
+            'ext': 'mp4',
+            'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
+            'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
+            'duration': 114,
         },
         },
-    }
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         # The xml is not well formatted, there are raw '&'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         # The xml is not well formatted, there are raw '&'
-        info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
-            video_id, u'Downloading info xml').replace('&', '&amp;')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
+                                  video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
 
         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
         formats = []
 
         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
         formats = []
@@ -36,20 +49,17 @@ class MetacriticIE(InfoExtractor):
                 'url': video_url,
                 'ext': 'mp4',
                 'format_id': rate_str,
                 'url': video_url,
                 'ext': 'mp4',
                 'format_id': rate_str,
-                'rate': int(rate_str),
+                'tbr': int(rate_str),
             })
             })
-        formats.sort(key=operator.itemgetter('rate'))
+        self._sort_formats(formats)
 
         description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
 
         description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
-            webpage, u'description', flags=re.DOTALL)
+                                              webpage, 'description', flags=re.DOTALL)
 
 
-        info = {
+        return {
             'id': video_id,
             'title': clip.find('title').text,
             'formats': formats,
             'description': description,
             'duration': int(clip.find('duration').text),
         }
             'id': video_id,
             'title': clip.find('title').text,
             'formats': formats,
             'description': description,
             'duration': int(clip.find('duration').text),
         }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info