Merge tag 'upstream/2014.11.21'

[youtubedl] / youtube_dl / extractor / nhl.py
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py

index 2edd806a3f6aa12792f3c8d8065a57fd2e2e70a1..82af6e33098eaa9018c6f58b93b4e8b4c9cff399 100644 (file)
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
  import re
  import json
  
  import re
  import json
  
@@ -5,7 +7,6 @@ from .common import InfoExtractor
  from ..utils import (
      compat_urlparse,
      compat_urllib_parse,
  from ..utils import (
      compat_urlparse,
      compat_urllib_parse,
-    determine_ext,
      unified_strdate,
  )
  
      unified_strdate,
  )
  
@@ -20,21 +21,23 @@ class NHLBaseInfoExtractor(InfoExtractor):
          self.report_extraction(video_id)
  
          initial_video_url = info['publishPoint']
          self.report_extraction(video_id)
  
          initial_video_url = info['publishPoint']
-        data = compat_urllib_parse.urlencode({
-            'type': 'fvod',
-            'path': initial_video_url.replace('.mp4', '_sd.mp4'),
-        })
-        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-        path_doc = self._download_xml(path_url, video_id,
-            u'Downloading final video url')
-        video_url = path_doc.find('path').text
+        if info['formats'] == '1':
+            data = compat_urllib_parse.urlencode({
+                'type': 'fvod',
+                'path': initial_video_url.replace('.mp4', '_sd.mp4'),
+            })
+            path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
+            path_doc = self._download_xml(
+                path_url, video_id, 'Downloading final video url')
+            video_url = path_doc.find('path').text
+        else:
+           video_url = initial_video_url
  
          join = compat_urlparse.urljoin
          return {
              'id': video_id,
              'title': info['name'],
              'url': video_url,
  
          join = compat_urlparse.urljoin
          return {
              'id': video_id,
              'title': info['name'],
              'url': video_url,
-            'ext': determine_ext(video_url),
              'description': info['description'],
              'duration': int(info['duration']),
              'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
              'description': info['description'],
              'duration': int(info['duration']),
              'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
@@ -43,41 +46,57 @@ class NHLBaseInfoExtractor(InfoExtractor):
  
  
  class NHLIE(NHLBaseInfoExtractor):
  
  
  class NHLIE(NHLBaseInfoExtractor):
-    IE_NAME = u'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
-
-    _TEST = {
-        u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
-        u'file': u'453614.mp4',
-        u'info_dict': {
-            u'title': u'Quick clip: Weise 4-3 goal vs Flames',
-            u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
-            u'duration': 18,
-            u'upload_date': u'20131006',
+    IE_NAME = 'nhl.com'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'
+
+    _TESTS = [{
+        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
+        'md5': 'db704a4ea09e8d3988c85e36cc892d09',
+        'info_dict': {
+            'id': '453614',
+            'ext': 'mp4',
+            'title': 'Quick clip: Weise 4-3 goal vs Flames',
+            'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
+            'duration': 18,
+            'upload_date': '20131006',
          },
          },
-    }
+    }, {
+        'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
+        'md5': 'd22e82bc592f52d37d24b03531ee9696',
+        'info_dict': {
+            'id': '2014020024-628-h',
+            'ext': 'mp4',
+            'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
+            'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
+            'duration': 0,
+            'upload_date': '20141011',
+        },
+    }, {
+        'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
          json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
          json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
-        info_json = self._download_webpage(json_url, video_id,
-            u'Downloading info json')
-        info_json = self._fix_json(info_json)
-        info = json.loads(info_json)[0]
-        return self._extract_video(info)
+        data = self._download_json(
+            json_url, video_id, transform_source=self._fix_json)
+        return self._extract_video(data[0])
  
  
  class NHLVideocenterIE(NHLBaseInfoExtractor):
  
  
  class NHLVideocenterIE(NHLBaseInfoExtractor):
-    IE_NAME = u'nhl.com:videocenter'
-    IE_DESC = u'NHL videocenter category'
-    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
-
-    @classmethod
-    def suitable(cls, url):
-        if NHLIE.suitable(url):
-            return False
-        return super(NHLVideocenterIE, cls).suitable(url)
+    IE_NAME = 'nhl.com:videocenter'
+    IE_DESC = 'NHL videocenter category'
+    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
+    _TEST = {
+        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
+        'info_dict': {
+            'id': '999',
+            'title': 'Highlights',
+        },
+        'playlist_count': 12,
+    }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -86,10 +105,10 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
          cat_id = self._search_regex(
              [r'var defaultCatId = "(.+?)";',
               r'{statusIndex:0,index:0,.*?id:(.*?),'],
          cat_id = self._search_regex(
              [r'var defaultCatId = "(.+?)";',
               r'{statusIndex:0,index:0,.*?id:(.*?),'],
-            webpage, u'category id')
+            webpage, 'category id')
          playlist_title = self._html_search_regex(
              r'tab0"[^>]*?>(.*?)</td>',
          playlist_title = self._html_search_regex(
              r'tab0"[^>]*?>(.*?)</td>',
-            webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()
+            webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
  
          data = compat_urllib_parse.urlencode({
              'cid': cat_id,
  
          data = compat_urllib_parse.urlencode({
              'cid': cat_id,
@@ -104,7 +123,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
          response = self._fix_json(response)
          if not response.strip():
              self._downloader.report_warning(u'Got an empty reponse, trying '
          response = self._fix_json(response)
          if not response.strip():
              self._downloader.report_warning(u'Got an empty reponse, trying '
-                                            u'adding the "newvideos" parameter')
+                                            'adding the "newvideos" parameter')
              response = self._download_webpage(request_url + '&newvideos=true',
                  playlist_title)
              response = self._fix_json(response)
              response = self._download_webpage(request_url + '&newvideos=true',
                  playlist_title)
              response = self._fix_json(response)
@@ -114,5 +133,5 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
              '_type': 'playlist',
              'title': playlist_title,
              'id': cat_id,
              '_type': 'playlist',
              'title': playlist_title,
              'id': cat_id,
-            'entries': [self._extract_video(i) for i in videos],
+            'entries': [self._extract_video(v) for v in videos],
          }
          }