]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/nbc.py
Merge tag 'upstream/2014.11.21'
[youtubedl] / youtube_dl / extractor / nbc.py
index aa34665d1669f32ab31a02618c58ef9c4b130fe2..7b5449031ebd2b7245d452c04ae50dfaf970d6ca 100644 (file)
@@ -4,7 +4,11 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import find_xpath_attr, compat_str
+from ..utils import (
+    compat_str,
+    ExtractorError,
+    find_xpath_attr,
+)
 
 
 class NBCIE(InfoExtractor):
@@ -12,9 +16,9 @@ class NBCIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
-        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
+        # md5 checksum is not stable
         'info_dict': {
-            'id': 'u1RInQZRN7QJ',
+            'id': 'bTmnLCvIbaaH',
             'ext': 'flv',
             'title': 'I Am a Firefighter',
             'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
@@ -22,8 +26,7 @@ class NBCIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
         if theplatform_url.startswith('//'):
@@ -53,7 +56,7 @@ class NBCNewsIE(InfoExtractor):
             'md5': 'b2421750c9f260783721d898f4c42063',
             'info_dict': {
                 'id': 'I1wpAI_zmhsQ',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'How Twitter Reacted To The Snowden Interview',
                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
             },
@@ -85,11 +88,27 @@ class NBCNewsIE(InfoExtractor):
                 flags=re.MULTILINE)
             bootstrap = json.loads(bootstrap_json)
             info = bootstrap['results'][0]['video']
-            playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
             mpxid = info['mpxId']
-            all_videos = self._download_json(playlist_url, title)['videos']
-            # The response contains additional videos
-            info = next(v for v in all_videos if v['mpxId'] == mpxid)
+
+            base_urls = [
+                info['fallbackPlaylistUrl'],
+                info['associatedPlaylistUrl'],
+            ]
+
+            for base_url in base_urls:
+                if not base_url:
+                    continue
+                playlist_url = base_url + '?form=MPXNBCNewsAPI'
+                all_videos = self._download_json(playlist_url, title)['videos']
+
+                try:
+                    info = next(v for v in all_videos if v['mpxId'] == mpxid)
+                    break
+                except StopIteration:
+                    continue
+
+            if info is None:
+                raise ExtractorError('Could not find video in playlists')
 
             return {
                 '_type': 'url',