]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nbc.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class NBCIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' 
  21             'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', 
  22             # md5 checksum is not stable 
  26                 'title': 'I Am a Firefighter', 
  27                 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', 
  31             'url': 'http://www.nbc.com/the-tonight-show/episodes/176', 
  35                 'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', 
  36                 'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', 
  38             'skip': 'Only works from US', 
  42     def _real_extract(self
, url
): 
  43         video_id 
= self
._match
_id
(url
) 
  44         webpage 
= self
._download
_webpage
(url
, video_id
) 
  45         theplatform_url 
= self
._search
_regex
( 
  46             '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', 
  47             webpage
, 'theplatform url').replace('_no_endcard', '') 
  48         if theplatform_url
.startswith('//'): 
  49             theplatform_url 
= 'http:' + theplatform_url
 
  50         return self
.url_result(theplatform_url
) 
  53 class NBCNewsIE(InfoExtractor
): 
  54     _VALID_URL 
= r
'''(?x)https?://www\.nbcnews\.com/ 
  55         ((video/.+?/(?P<id>\d+))| 
  56         (feature/[^/]+/(?P<title>.+))) 
  61             'url': 'http://www.nbcnews.com/video/nbc-news/52753292', 
  62             'md5': '47abaac93c6eaf9ad37ee6c4463a5179', 
  66                 'title': 'Crew emerges after four-month Mars food study', 
  67                 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', 
  71             'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236', 
  72             'md5': 'b2421750c9f260783721d898f4c42063', 
  76                 'title': 'How Twitter Reacted To The Snowden Interview', 
  77                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', 
  79             'add_ie': ['ThePlatform'], 
  83     def _real_extract(self
, url
): 
  84         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  85         video_id 
= mobj
.group('id') 
  86         if video_id 
is not None: 
  87             all_info 
= self
._download
_xml
('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id
, video_id
) 
  88             info 
= all_info
.find('video') 
  92                 'title': info
.find('headline').text
, 
  94                 'url': find_xpath_attr(info
, 'media', 'type', 'flashVideo').text
, 
  95                 'description': compat_str(info
.find('caption').text
), 
  96                 'thumbnail': find_xpath_attr(info
, 'media', 'type', 'thumbnail').text
, 
  99             # "feature" pages use theplatform.com 
 100             title 
= mobj
.group('title') 
 101             webpage 
= self
._download
_webpage
(url
, title
) 
 102             bootstrap_json 
= self
._search
_regex
( 
 103                 r
'var bootstrapJson = ({.+})\s*$', webpage
, 'bootstrap json', 
 105             bootstrap 
= json
.loads(bootstrap_json
) 
 106             info 
= bootstrap
['results'][0]['video'] 
 107             mpxid 
= info
['mpxId'] 
 110                 info
['fallbackPlaylistUrl'], 
 111                 info
['associatedPlaylistUrl'], 
 114             for base_url 
in base_urls
: 
 117                 playlist_url 
= base_url 
+ '?form=MPXNBCNewsAPI' 
 118                 all_videos 
= self
._download
_json
(playlist_url
, title
)['videos'] 
 121                     info 
= next(v 
for v 
in all_videos 
if v
['mpxId'] == mpxid
) 
 123                 except StopIteration: 
 127                 raise ExtractorError('Could not find video in playlists') 
 131                 # We get the best quality video 
 132                 'url': info
['videoAssets'][-1]['publicUrl'], 
 133                 'ie_key': 'ThePlatform',