Update upstream source from tag 'upstream/2019.09.01'

[youtubedl] / youtube_dl / extractor / espn.py
diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py

index 7a743606836517736e858411e626a5e2a502890e..6cf05e6da8204c1e7cb6f7790fcb24104c8341cd 100644 (file)
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@@ -1,6 +1,9 @@
  from __future__ import unicode_literals
  
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from .common import InfoExtractor
+from .once import OnceIE
  from ..compat import compat_str
  from ..utils import (
      determine_ext,
  from ..compat import compat_str
  from ..utils import (
      determine_ext,
@@ -9,22 +12,28 @@ from ..utils import (
  )
  
  
  )
  
  
-class ESPNIE(InfoExtractor):
+class ESPNIE(OnceIE):
      _VALID_URL = r'''(?x)
                      https?://
      _VALID_URL = r'''(?x)
                      https?://
-                        (?:
-                            (?:(?:\w+\.)+)?espn\.go|
-                            (?:www\.)?espn
-                        )\.com/
                          (?:
                              (?:
                          (?:
                              (?:
-                                video/clip|
-                                watch/player
-                            )
-                            (?:
-                                \?.*?\bid=|
-                                /_/id/
-                            )
+                                (?:
+                                    (?:(?:\w+\.)+)?espn\.go|
+                                    (?:www\.)?espn
+                                )\.com/
+                                (?:
+                                    (?:
+                                        video/(?:clip|iframe/twitter)|
+                                        watch/player
+                                    )
+                                    (?:
+                                        .*?\?.*?\bid=|
+                                        /_/id/
+                                    )|
+                                    [^/]+/video/
+                                )
+                            )|
+                            (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
                          )
                          (?P<id>\d+)
                      '''
                          )
                          (?P<id>\d+)
                      '''
@@ -77,6 +86,18 @@ class ESPNIE(InfoExtractor):
      }, {
          'url': 'http://www.espn.com/video/clip/_/id/17989860',
          'only_matching': True,
      }, {
          'url': 'http://www.espn.com/video/clip/_/id/17989860',
          'only_matching': True,
+    }, {
+        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
      }]
  
      def _real_extract(self, url):
@@ -93,7 +114,9 @@ class ESPNIE(InfoExtractor):
  
          def traverse_source(source, base_source_id=None):
              for source_id, source in source.items():
  
          def traverse_source(source, base_source_id=None):
              for source_id, source in source.items():
-                if isinstance(source, compat_str):
+                if source_id == 'alert':
+                    continue
+                elif isinstance(source, compat_str):
                      extract_source(source, base_source_id)
                  elif isinstance(source, dict):
                      traverse_source(
                      extract_source(source, base_source_id)
                  elif isinstance(source, dict):
                      traverse_source(
@@ -106,7 +129,9 @@ class ESPNIE(InfoExtractor):
                  return
              format_urls.add(source_url)
              ext = determine_ext(source_url)
                  return
              format_urls.add(source_url)
              ext = determine_ext(source_url)
-            if ext == 'smil':
+            if OnceIE.suitable(source_url):
+                formats.extend(self._extract_once_formats(source_url))
+            elif ext == 'smil':
                  formats.extend(self._extract_smil_formats(
                      source_url, video_id, fatal=False))
              elif ext == 'f4m':
                  formats.extend(self._extract_smil_formats(
                      source_url, video_id, fatal=False))
              elif ext == 'f4m':
@@ -117,12 +142,24 @@ class ESPNIE(InfoExtractor):
                      source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                      m3u8_id=source_id, fatal=False))
              else:
                      source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                      m3u8_id=source_id, fatal=False))
              else:
-                formats.append({
+                f = {
                      'url': source_url,
                      'format_id': source_id,
                      'url': source_url,
                      'format_id': source_id,
-                })
-
-        traverse_source(clip['links']['source'])
+                }
+                mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
+                if mobj:
+                    f.update({
+                        'height': int(mobj.group(1)),
+                        'fps': int(mobj.group(2)),
+                        'tbr': int(mobj.group(3)),
+                    })
+                if source_id == 'mezzanine':
+                    f['preference'] = 1
+                formats.append(f)
+
+        links = clip.get('links', {})
+        traverse_source(links.get('source', {}))
+        traverse_source(links.get('mobile', {}))
          self._sort_formats(formats)
  
          description = clip.get('caption') or clip.get('description')
          self._sort_formats(formats)
  
          description = clip.get('caption') or clip.get('description')
@@ -144,9 +181,6 @@ class ESPNIE(InfoExtractor):
  class ESPNArticleIE(InfoExtractor):
      _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
      _TESTS = [{
  class ESPNArticleIE(InfoExtractor):
      _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
      _TESTS = [{
-        'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
-        'only_matching': True,
-    }, {
          'url': 'http://espn.go.com/nba/recap?gameId=400793786',
          'only_matching': True,
      }, {
          'url': 'http://espn.go.com/nba/recap?gameId=400793786',
          'only_matching': True,
      }, {
@@ -175,3 +209,30 @@ class ESPNArticleIE(InfoExtractor):
  
          return self.url_result(
              'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
  
          return self.url_result(
              'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
+
+
+class FiveThirtyEightIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
+        'info_dict': {
+            'id': '56032156',
+            'ext': 'flv',
+            'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
+            'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        embed_url = self._search_regex(
+            r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
+            webpage, 'embed url')
+
+        return self.url_result(embed_url, 'AbcNewsVideo')