]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nbc.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import find_xpath_attr
, compat_str
 
  10 class NBCIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)' 
  14         'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', 
  15         'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', 
  19             'title': 'I Am a Firefighter', 
  20             'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', 
  24     def _real_extract(self
, url
): 
  25         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  26         video_id 
= mobj
.group('id') 
  27         webpage 
= self
._download
_webpage
(url
, video_id
) 
  28         theplatform_url 
= self
._search
_regex
('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage
, 'theplatform url') 
  29         if theplatform_url
.startswith('//'): 
  30             theplatform_url 
= 'http:' + theplatform_url
 
  31         return self
.url_result(theplatform_url
) 
  34 class NBCNewsIE(InfoExtractor
): 
  35     _VALID_URL 
= r
'''(?x)https?://www\.nbcnews\.com/ 
  36         ((video/.+?/(?P<id>\d+))| 
  37         (feature/[^/]+/(?P<title>.+))) 
  42             'url': 'http://www.nbcnews.com/video/nbc-news/52753292', 
  43             'md5': '47abaac93c6eaf9ad37ee6c4463a5179', 
  47                 'title': 'Crew emerges after four-month Mars food study', 
  48                 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', 
  52             'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236', 
  53             'md5': 'b2421750c9f260783721d898f4c42063', 
  57                 'title': 'How Twitter Reacted To The Snowden Interview', 
  58                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', 
  60             'add_ie': ['ThePlatform'], 
  64     def _real_extract(self
, url
): 
  65         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  66         video_id 
= mobj
.group('id') 
  67         if video_id 
is not None: 
  68             all_info 
= self
._download
_xml
('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id
, video_id
) 
  69             info 
= all_info
.find('video') 
  73                 'title': info
.find('headline').text
, 
  75                 'url': find_xpath_attr(info
, 'media', 'type', 'flashVideo').text
, 
  76                 'description': compat_str(info
.find('caption').text
), 
  77                 'thumbnail': find_xpath_attr(info
, 'media', 'type', 'thumbnail').text
, 
  80             # "feature" pages use theplatform.com 
  81             title 
= mobj
.group('title') 
  82             webpage 
= self
._download
_webpage
(url
, title
) 
  83             bootstrap_json 
= self
._search
_regex
( 
  84                 r
'var bootstrapJson = ({.+})\s*$', webpage
, 'bootstrap json', 
  86             bootstrap 
= json
.loads(bootstrap_json
) 
  87             info 
= bootstrap
['results'][0]['video'] 
  88             playlist_url 
= info
['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI' 
  90             all_videos 
= self
._download
_json
(playlist_url
, title
)['videos'] 
  91             # The response contains additional videos 
  92             info 
= next(v 
for v 
in all_videos 
if v
['mpxId'] == mpxid
) 
  96                 # We get the best quality video 
  97                 'url': info
['videoAssets'][-1]['publicUrl'], 
  98                 'ie_key': 'ThePlatform',