]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/espn.py
d/p/disable-autoupdate-mechanism.patch: Extend to clean up errant import and README...
[youtubedl] / youtube_dl / extractor / espn.py
index 7a743606836517736e858411e626a5e2a502890e..6cf05e6da8204c1e7cb6f7790fcb24104c8341cd 100644 (file)
@@ -1,6 +1,9 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from .once import OnceIE
 from ..compat import compat_str
 from ..utils import (
     determine_ext,
@@ -9,22 +12,28 @@ from ..utils import (
 )
 
 
-class ESPNIE(InfoExtractor):
+class ESPNIE(OnceIE):
     _VALID_URL = r'''(?x)
                     https?://
-                        (?:
-                            (?:(?:\w+\.)+)?espn\.go|
-                            (?:www\.)?espn
-                        )\.com/
                         (?:
                             (?:
-                                video/clip|
-                                watch/player
-                            )
-                            (?:
-                                \?.*?\bid=|
-                                /_/id/
-                            )
+                                (?:
+                                    (?:(?:\w+\.)+)?espn\.go|
+                                    (?:www\.)?espn
+                                )\.com/
+                                (?:
+                                    (?:
+                                        video/(?:clip|iframe/twitter)|
+                                        watch/player
+                                    )
+                                    (?:
+                                        .*?\?.*?\bid=|
+                                        /_/id/
+                                    )|
+                                    [^/]+/video/
+                                )
+                            )|
+                            (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
                         )
                         (?P<id>\d+)
                     '''
@@ -77,6 +86,18 @@ class ESPNIE(InfoExtractor):
     }, {
         'url': 'http://www.espn.com/video/clip/_/id/17989860',
         'only_matching': True,
+    }, {
+        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -93,7 +114,9 @@ class ESPNIE(InfoExtractor):
 
         def traverse_source(source, base_source_id=None):
             for source_id, source in source.items():
-                if isinstance(source, compat_str):
+                if source_id == 'alert':
+                    continue
+                elif isinstance(source, compat_str):
                     extract_source(source, base_source_id)
                 elif isinstance(source, dict):
                     traverse_source(
@@ -106,7 +129,9 @@ class ESPNIE(InfoExtractor):
                 return
             format_urls.add(source_url)
             ext = determine_ext(source_url)
-            if ext == 'smil':
+            if OnceIE.suitable(source_url):
+                formats.extend(self._extract_once_formats(source_url))
+            elif ext == 'smil':
                 formats.extend(self._extract_smil_formats(
                     source_url, video_id, fatal=False))
             elif ext == 'f4m':
@@ -117,12 +142,24 @@ class ESPNIE(InfoExtractor):
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id=source_id, fatal=False))
             else:
-                formats.append({
+                f = {
                     'url': source_url,
                     'format_id': source_id,
-                })
-
-        traverse_source(clip['links']['source'])
+                }
+                mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
+                if mobj:
+                    f.update({
+                        'height': int(mobj.group(1)),
+                        'fps': int(mobj.group(2)),
+                        'tbr': int(mobj.group(3)),
+                    })
+                if source_id == 'mezzanine':
+                    f['preference'] = 1
+                formats.append(f)
+
+        links = clip.get('links', {})
+        traverse_source(links.get('source', {}))
+        traverse_source(links.get('mobile', {}))
         self._sort_formats(formats)
 
         description = clip.get('caption') or clip.get('description')
@@ -144,9 +181,6 @@ class ESPNIE(InfoExtractor):
 class ESPNArticleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
     _TESTS = [{
-        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
-        'only_matching': True,
-    }, {
         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
         'only_matching': True,
     }, {
@@ -175,3 +209,30 @@ class ESPNArticleIE(InfoExtractor):
 
         return self.url_result(
             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
+
+
+class FiveThirtyEightIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
+        'info_dict': {
+            'id': '56032156',
+            'ext': 'flv',
+            'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
+            'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        embed_url = self._search_regex(
+            r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
+            webpage, 'embed url')
+
+        return self.url_result(embed_url, 'AbcNewsVideo')