]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nerdist.py
c6dc34be4e92b204b4930980039b2475864be154
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
13 class NerdistIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
16 'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
17 'md5': '3698ed582931b90d9e81e02e26e89f23',
19 'display_id': 'exclusive-which-dc-characters-w',
22 'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
23 'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
24 'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
25 'uploader': 'Eric Diaz',
26 'upload_date': '20150202',
27 'timestamp': 1422892808,
31 def _real_extract(self
, url
):
32 display_id
= self
._match
_id
(url
)
33 webpage
= self
._download
_webpage
(url
, display_id
)
35 video_id
= self
._search
_regex
(
36 r
'''(?x)<script\s+(?:type="text/javascript"\s+)?
37 src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
39 timestamp
= parse_iso8601(self
._html
_search
_meta
(
40 'shareaholic:article_published_time', webpage
, 'upload date'))
41 uploader
= self
._html
_search
_meta
(
42 'shareaholic:article_author_name', webpage
, 'article author')
44 doc
= self
._download
_xml
(
45 'http://content.nerdist.com/jw6/%s.xml' % video_id
, video_id
)
46 video_info
= doc
.find('.//item')
47 title
= xpath_text(video_info
, './title', fatal
=True)
48 description
= xpath_text(video_info
, './description')
49 thumbnail
= xpath_text(
50 video_info
, './{http://rss.jwpcdn.com/}image', 'thumbnail')
53 for source
in video_info
.findall('./{http://rss.jwpcdn.com/}source'):
54 vurl
= source
.attrib
['file']
55 ext
= determine_ext(vurl
)
57 formats
.extend(self
._extract
_m
3u8_formats
(
58 vurl
, video_id
, entry_protocol
='m3u8_native', ext
='mp4',
61 formats
.extend(self
._extract
_smil
_formats
(
62 vurl
, video_id
, fatal
=False
69 self
._sort
_formats
(formats
)
73 'display_id': display_id
,
75 'description': description
,
76 'thumbnail': thumbnail
,
77 'timestamp': timestamp
,