]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mlb.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  12 class MLBIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)' 
  16             'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', 
  17             'md5': 'ff56a598c2cf411a9a38a69709e97079', 
  21                 'title': "Ackley's spectacular catch", 
  22                 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', 
  24                 'timestamp': 1405980600, 
  25                 'upload_date': '20140721', 
  26                 'thumbnail': 're:^https?://.*\.jpg$', 
  30             'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', 
  31             'md5': 'd9c022c10d21f849f49c05ae12a8a7e9', 
  35                 'title': 'Stanton prepares for Derby', 
  36                 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', 
  38                 'timestamp': 1405105800, 
  39                 'upload_date': '20140711', 
  40                 'thumbnail': 're:^https?://.*\.jpg$', 
  44             'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby', 
  45             'md5': '0e6e73d509321e142409b695eadd541f', 
  49                 'title': 'Cespedes repeats as Derby champ', 
  50                 'description': 'md5:08df253ce265d4cf6fb09f581fafad07', 
  52                 'timestamp': 1405399936, 
  53                 'upload_date': '20140715', 
  54                 'thumbnail': 're:^https?://.*\.jpg$', 
  58             'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance', 
  59             'md5': 'b8fd237347b844365d74ea61d4245967', 
  63                 'title': 'Bautista on Home Run Derby', 
  64                 'description': 'md5:b80b34031143d0986dddc64a8839f0fb', 
  66                 'timestamp': 1405390722, 
  67                 'upload_date': '20140715', 
  68                 'thumbnail': 're:^https?://.*\.jpg$', 
  72             'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', 
  73             'only_matching': True, 
  76             'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553', 
  77             'only_matching': True, 
  80             'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553', 
  81             'only_matching': True, 
  85     def _real_extract(self
, url
): 
  86         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  87         video_id 
= mobj
.group('id') 
  89         detail 
= self
._download
_xml
( 
  90             'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' 
  91             % (video_id
[-3], video_id
[-2], video_id
[-1], video_id
), video_id
) 
  93         title 
= detail
.find('./headline').text
 
  94         description 
= detail
.find('./big-blurb').text
 
  95         duration 
= parse_duration(detail
.find('./duration').text
) 
  96         timestamp 
= parse_iso8601(detail
.attrib
['date'][:-5]) 
  99             'url': thumbnail
.text
, 
 100         } for thumbnail 
in detail
.findall('./thumbnailScenarios/thumbnailScenario')] 
 103         for media_url 
in detail
.findall('./url'): 
 104             playback_scenario 
= media_url
.attrib
['playback_scenario'] 
 106                 'url': media_url
.text
, 
 107                 'format_id': playback_scenario
, 
 109             m 
= re
.search(r
'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario
) 
 112                     'vbr': int(m
.group('vbr')) * 1000, 
 113                     'width': int(m
.group('width')), 
 114                     'height': int(m
.group('height')), 
 118         self
._sort
_formats
(formats
) 
 123             'description': description
, 
 124             'duration': duration
, 
 125             'timestamp': timestamp
, 
 127             'thumbnails': thumbnails
,