]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nbc.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class NBCIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' 
  22             'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', 
  23             # md5 checksum is not stable 
  27                 'title': 'I Am a Firefighter', 
  28                 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', 
  32             'url': 'http://www.nbc.com/the-tonight-show/episodes/176', 
  36                 'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', 
  37                 'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', 
  39             'skip': 'Only works from US', 
  43     def _real_extract(self
, url
): 
  44         video_id 
= self
._match
_id
(url
) 
  45         webpage 
= self
._download
_webpage
(url
, video_id
) 
  46         theplatform_url 
= self
._search
_regex
( 
  47             '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', 
  48             webpage
, 'theplatform url').replace('_no_endcard', '') 
  49         if theplatform_url
.startswith('//'): 
  50             theplatform_url 
= 'http:' + theplatform_url
 
  51         return self
.url_result(theplatform_url
) 
  54 class NBCNewsIE(InfoExtractor
): 
  55     _VALID_URL 
= r
'''(?x)https?://www\.nbcnews\.com/ 
  56         ((video/.+?/(?P<id>\d+))| 
  57         (feature/[^/]+/(?P<title>.+))) 
  62             'url': 'http://www.nbcnews.com/video/nbc-news/52753292', 
  63             'md5': '47abaac93c6eaf9ad37ee6c4463a5179', 
  67                 'title': 'Crew emerges after four-month Mars food study', 
  68                 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', 
  72             'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236', 
  73             'md5': 'b2421750c9f260783721d898f4c42063', 
  77                 'title': 'How Twitter Reacted To The Snowden Interview', 
  78                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', 
  80             'add_ie': ['ThePlatform'], 
  83             'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', 
  84             'md5': 'fdbf39ab73a72df5896b6234ff98518a', 
  88                 'title': 'FULL EPISODE: Family Business', 
  89                 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', 
  94     def _real_extract(self
, url
): 
  95         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  96         video_id 
= mobj
.group('id') 
  97         if video_id 
is not None: 
  98             all_info 
= self
._download
_xml
('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id
, video_id
) 
  99             info 
= all_info
.find('video') 
 103                 'title': info
.find('headline').text
, 
 105                 'url': find_xpath_attr(info
, 'media', 'type', 'flashVideo').text
, 
 106                 'description': compat_str(info
.find('caption').text
), 
 107                 'thumbnail': find_xpath_attr(info
, 'media', 'type', 'thumbnail').text
, 
 110             # "feature" pages use theplatform.com 
 111             title 
= mobj
.group('title') 
 112             webpage 
= self
._download
_webpage
(url
, title
) 
 113             bootstrap_json 
= self
._search
_regex
( 
 114                 r
'var bootstrapJson = ({.+})\s*$', webpage
, 'bootstrap json', 
 116             bootstrap 
= json
.loads(bootstrap_json
) 
 117             info 
= bootstrap
['results'][0]['video'] 
 118             mpxid 
= info
['mpxId'] 
 121                 info
['fallbackPlaylistUrl'], 
 122                 info
['associatedPlaylistUrl'], 
 125             for base_url 
in base_urls
: 
 128                 playlist_url 
= base_url 
+ '?form=MPXNBCNewsAPI' 
 131                     all_videos 
= self
._download
_json
(playlist_url
, title
) 
 132                 except ExtractorError 
as ee
: 
 133                     if isinstance(ee
.cause
, compat_HTTPError
): 
 137                 if not all_videos 
or 'videos' not in all_videos
: 
 141                     info 
= next(v 
for v 
in all_videos
['videos'] if v
['mpxId'] == mpxid
) 
 143                 except StopIteration: 
 147                 raise ExtractorError('Could not find video in playlists') 
 151                 # We get the best quality video 
 152                 'url': info
['videoAssets'][-1]['publicUrl'], 
 153                 'ie_key': 'ThePlatform',