X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/af014acd27e0b471d5903630847eabb26437b46c..ecaee9d66905db1d5836d396c705d18d6e5f1f72:/youtube_dl/extractor/espn.py?ds=inline diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 7a74360..127c69b 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -1,6 +1,9 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from .once import OnceIE from ..compat import compat_str from ..utils import ( determine_ext, @@ -9,22 +12,27 @@ from ..utils import ( ) -class ESPNIE(InfoExtractor): +class ESPNIE(OnceIE): _VALID_URL = r'''(?x) https?:// (?: - (?:(?:\w+\.)+)?espn\.go| - (?:www\.)?espn - )\.com/ - (?: - (?: - video/clip| - watch/player - ) (?: - \?.*?\bid=| - /_/id/ - ) + (?: + (?:(?:\w+\.)+)?espn\.go| + (?:www\.)?espn + )\.com/ + (?: + (?: + video/(?:clip|iframe/twitter)| + watch/player + ) + (?: + .*?\?.*?\bid=| + /_/id/ + ) + ) + )| + (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ ) (?P\d+) ''' @@ -77,6 +85,15 @@ class ESPNIE(InfoExtractor): }, { 'url': 'http://www.espn.com/video/clip/_/id/17989860', 'only_matching': True, + }, { + 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', + 'only_matching': True, + }, { + 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls', + 'only_matching': True, + }, { + 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', + 'only_matching': True, }] def _real_extract(self, url): @@ -93,7 +110,9 @@ class ESPNIE(InfoExtractor): def traverse_source(source, base_source_id=None): for source_id, source in source.items(): - if isinstance(source, compat_str): + if source_id == 'alert': + continue + elif isinstance(source, compat_str): extract_source(source, base_source_id) elif isinstance(source, dict): traverse_source( @@ -106,7 +125,9 @@ class ESPNIE(InfoExtractor): return format_urls.add(source_url) ext = determine_ext(source_url) - if ext == 'smil': + if OnceIE.suitable(source_url): + formats.extend(self._extract_once_formats(source_url)) + elif ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) elif ext == 'f4m': @@ -117,12 +138,24 @@ class ESPNIE(InfoExtractor): source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=source_id, fatal=False)) else: - formats.append({ + f = { 'url': source_url, 'format_id': source_id, - }) - - traverse_source(clip['links']['source']) + } + mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url) + if mobj: + f.update({ + 'height': int(mobj.group(1)), + 'fps': int(mobj.group(2)), + 'tbr': int(mobj.group(3)), + }) + if source_id == 'mezzanine': + f['preference'] = 1 + formats.append(f) + + links = clip.get('links', {}) + traverse_source(links.get('source', {})) + traverse_source(links.get('mobile', {})) self._sort_formats(formats) description = clip.get('caption') or clip.get('description') @@ -144,9 +177,6 @@ class ESPNIE(InfoExtractor): class ESPNArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ - 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', - 'only_matching': True, - }, { 'url': 'http://espn.go.com/nba/recap?gameId=400793786', 'only_matching': True, }, { @@ -175,3 +205,34 @@ class ESPNArticleIE(InfoExtractor): return self.url_result( 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) + + +class FiveThirtyEightIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P[^/?#]+)' + _TEST = { + 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', + 'info_dict': { + 'id': '21846851', + 'ext': 'mp4', + 'title': 'FiveThirtyEight: The Raiders can still make the playoffs', + 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', + 'timestamp': 1513960621, + 'upload_date': '20171222', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'data-video-id=["\'](?P\d+)', + webpage, 'video id', group='id') + + return self.url_result( + 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())