]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/espn.py
127c69b2eb668b85236eccfaccc63dcb7dd9d714
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from .once 
import OnceIE
 
   7 from ..compat 
import compat_str
 
  21                                     (?:(?:\w+\.)+)?espn\.go| 
  26                                         video/(?:clip|iframe/twitter)| 
  35                             (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ 
  41         'url': 'http://espn.go.com/video/clip?id=10365079', 
  45             'title': '30 for 30 Shorts: Judging Jewell', 
  46             'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', 
  47             'timestamp': 1390936111, 
  48             'upload_date': '20140128', 
  51             'skip_download': True, 
  54         'url': 'https://broadband.espn.go.com/video/clip?id=18910086', 
  58             'title': 'Kyrie spins around defender for two', 
  59             'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b', 
  60             'timestamp': 1489539155, 
  61             'upload_date': '20170315', 
  64             'skip_download': True, 
  66         'expected_warnings': ['Unable to download f4m manifest'], 
  68         'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672', 
  69         'only_matching': True, 
  71         'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774', 
  72         'only_matching': True, 
  74         'url': 'http://www.espn.com/watch/player?id=19141491', 
  75         'only_matching': True, 
  77         'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875', 
  78         'only_matching': True, 
  80         'url': 'http://www.espn.com/watch/player/_/id/19141491', 
  81         'only_matching': True, 
  83         'url': 'http://www.espn.com/video/clip?id=10365079', 
  84         'only_matching': True, 
  86         'url': 'http://www.espn.com/video/clip/_/id/17989860', 
  87         'only_matching': True, 
  89         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 
  90         'only_matching': True, 
  92         'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls', 
  93         'only_matching': True, 
  95         'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', 
  96         'only_matching': True, 
  99     def _real_extract(self
, url
): 
 100         video_id 
= self
._match
_id
(url
) 
 102         clip 
= self
._download
_json
( 
 103             'http://api-app.espn.com/v1/video/clips/%s' % video_id
, 
 104             video_id
)['videos'][0] 
 106         title 
= clip
['headline'] 
 111         def traverse_source(source
, base_source_id
=None): 
 112             for source_id
, source 
in source
.items(): 
 113                 if source_id 
== 'alert': 
 115                 elif isinstance(source
, compat_str
): 
 116                     extract_source(source
, base_source_id
) 
 117                 elif isinstance(source
, dict): 
 120                         '%s-%s' % (base_source_id
, source_id
) 
 121                         if base_source_id 
else source_id
) 
 123         def extract_source(source_url
, source_id
=None): 
 124             if source_url 
in format_urls
: 
 126             format_urls
.add(source_url
) 
 127             ext 
= determine_ext(source_url
) 
 128             if OnceIE
.suitable(source_url
): 
 129                 formats
.extend(self
._extract
_once
_formats
(source_url
)) 
 131                 formats
.extend(self
._extract
_smil
_formats
( 
 132                     source_url
, video_id
, fatal
=False)) 
 134                 formats
.extend(self
._extract
_f
4m
_formats
( 
 135                     source_url
, video_id
, f4m_id
=source_id
, fatal
=False)) 
 137                 formats
.extend(self
._extract
_m
3u8_formats
( 
 138                     source_url
, video_id
, 'mp4', entry_protocol
='m3u8_native', 
 139                     m3u8_id
=source_id
, fatal
=False)) 
 143                     'format_id': source_id
, 
 145                 mobj 
= re
.search(r
'(\d+)p(\d+)_(\d+)k\.', source_url
) 
 148                         'height': int(mobj
.group(1)), 
 149                         'fps': int(mobj
.group(2)), 
 150                         'tbr': int(mobj
.group(3)), 
 152                 if source_id 
== 'mezzanine': 
 156         links 
= clip
.get('links', {}) 
 157         traverse_source(links
.get('source', {})) 
 158         traverse_source(links
.get('mobile', {})) 
 159         self
._sort
_formats
(formats
) 
 161         description 
= clip
.get('caption') or clip
.get('description') 
 162         thumbnail 
= clip
.get('thumbnail') 
 163         duration 
= int_or_none(clip
.get('duration')) 
 164         timestamp 
= unified_timestamp(clip
.get('originalPublishDate')) 
 169             'description': description
, 
 170             'thumbnail': thumbnail
, 
 171             'timestamp': timestamp
, 
 172             'duration': duration
, 
 177 class ESPNArticleIE(InfoExtractor
): 
 178     _VALID_URL 
= r
'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' 
 180         'url': 'http://espn.go.com/nba/recap?gameId=400793786', 
 181         'only_matching': True, 
 183         'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge', 
 184         'only_matching': True, 
 186         'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings', 
 187         'only_matching': True, 
 189         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 
 190         'only_matching': True, 
 194     def suitable(cls
, url
): 
 195         return False if ESPNIE
.suitable(url
) else super(ESPNArticleIE
, cls
).suitable(url
) 
 197     def _real_extract(self
, url
): 
 198         video_id 
= self
._match
_id
(url
) 
 200         webpage 
= self
._download
_webpage
(url
, video_id
) 
 202         video_id 
= self
._search
_regex
( 
 203             r
'class=(["\']).*?video
-play
-button
.*?\
1[^
>]+data
-id=["\'](?P<id>\d+)', 
 204             webpage, 'video id', group='id') 
 206         return self.url_result( 
 207             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) 
 210 class FiveThirtyEightIE(InfoExtractor): 
 211     _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)' 
 213         'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/', 
 217             'title': 'FiveThirtyEight: The Raiders can still make the playoffs', 
 218             'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.', 
 219             'timestamp': 1513960621, 
 220             'upload_date': '20171222', 
 223             'skip_download': True, 
 225         'expected_warnings': ['Unable to download f4m manifest'], 
 228     def _real_extract(self, url): 
 229         video_id = self._match_id(url) 
 231         webpage = self._download_webpage(url, video_id) 
 233         video_id = self._search_regex( 
 234             r'data-video-id=["\'](?P
<id>\d
+)', 
 235             webpage, 'video 
id', group='id') 
 237         return self.url_result( 
 238             'http
://espn
.go
.com
/video
/clip?
id=%s' % video_id, ESPNIE.ie_key())