Merge tag 'upstream/2014.11.21'

[youtubedl] / youtube_dl / extractor / nbc.py
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

index aa34665d1669f32ab31a02618c58ef9c4b130fe2..7b5449031ebd2b7245d452c04ae50dfaf970d6ca 100644 (file)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -4,7 +4,11 @@ import re
  import json
  
  from .common import InfoExtractor
-from ..utils import find_xpath_attr, compat_str
+from ..utils import (
+    compat_str,
+    ExtractorError,
+    find_xpath_attr,
+)
  
  
  class NBCIE(InfoExtractor):
@@ -12,9 +16,9 @@ class NBCIE(InfoExtractor):
  
      _TEST = {
          'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
-        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
+        # md5 checksum is not stable
          'info_dict': {
-            'id': 'u1RInQZRN7QJ',
+            'id': 'bTmnLCvIbaaH',
              'ext': 'flv',
              'title': 'I Am a Firefighter',
              'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
@@ -22,8 +26,7 @@ class NBCIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
          theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
          if theplatform_url.startswith('//'):
@@ -53,7 +56,7 @@ class NBCNewsIE(InfoExtractor):
              'md5': 'b2421750c9f260783721d898f4c42063',
              'info_dict': {
                  'id': 'I1wpAI_zmhsQ',
-                'ext': 'flv',
+                'ext': 'mp4',
                  'title': 'How Twitter Reacted To The Snowden Interview',
                  'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
              },
@@ -85,11 +88,27 @@ class NBCNewsIE(InfoExtractor):
                  flags=re.MULTILINE)
              bootstrap = json.loads(bootstrap_json)
              info = bootstrap['results'][0]['video']
-            playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
              mpxid = info['mpxId']
-            all_videos = self._download_json(playlist_url, title)['videos']
-            # The response contains additional videos
-            info = next(v for v in all_videos if v['mpxId'] == mpxid)
+
+            base_urls = [
+                info['fallbackPlaylistUrl'],
+                info['associatedPlaylistUrl'],
+            ]
+
+            for base_url in base_urls:
+                if not base_url:
+                    continue
+                playlist_url = base_url + '?form=MPXNBCNewsAPI'
+                all_videos = self._download_json(playlist_url, title)['videos']
+
+                try:
+                    info = next(v for v in all_videos if v['mpxId'] == mpxid)
+                    break
+                except StopIteration:
+                    continue
+
+            if info is None:
+                raise ExtractorError('Could not find video in playlists')
  
              return {
                  '_type': 'url',