]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nerdist.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  13 class NerdistIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)' 
  16         'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w', 
  17         'md5': '3698ed582931b90d9e81e02e26e89f23', 
  19             'display_id': 'exclusive-which-dc-characters-w', 
  22             'title': 'Your TEEN TITANS Revealed! Who\'s on the show?', 
  23             'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$', 
  24             'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!', 
  25             'uploader': 'Eric Diaz', 
  26             'upload_date': '20150202', 
  27             'timestamp': 1422892808, 
  31     def _real_extract(self
, url
): 
  32         display_id 
= self
._match
_id
(url
) 
  33         webpage 
= self
._download
_webpage
(url
, display_id
) 
  35         video_id 
= self
._search
_regex
( 
  36             r
'''(?x)<script\s+(?:type="text/javascript"\s+)? 
  37                 src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''', 
  39         timestamp 
= parse_iso8601(self
._html
_search
_meta
( 
  40             'shareaholic:article_published_time', webpage
, 'upload date')) 
  41         uploader 
= self
._html
_search
_meta
( 
  42             'shareaholic:article_author_name', webpage
, 'article author') 
  44         doc 
= self
._download
_xml
( 
  45             'http://content.nerdist.com/jw6/%s.xml' % video_id
, video_id
) 
  46         video_info 
= doc
.find('.//item') 
  47         title 
= xpath_text(video_info
, './title', fatal
=True) 
  48         description 
= xpath_text(video_info
, './description') 
  49         thumbnail 
= xpath_text( 
  50             video_info
, './{http://rss.jwpcdn.com/}image', 'thumbnail') 
  53         for source 
in video_info
.findall('./{http://rss.jwpcdn.com/}source'): 
  54             vurl 
= source
.attrib
['file'] 
  55             ext 
= determine_ext(vurl
) 
  57                 formats
.extend(self
._extract
_m
3u8_formats
( 
  58                     vurl
, video_id
, entry_protocol
='m3u8_native', ext
='mp4', 
  61                 formats
.extend(self
._extract
_smil
_formats
( 
  62                     vurl
, video_id
, fatal
=False 
  69         self
._sort
_formats
(formats
) 
  73             'display_id': display_id
, 
  75             'description': description
, 
  76             'thumbnail': thumbnail
, 
  77             'timestamp': timestamp
,