]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
6 from ..compat
import compat_urlparse
15 class VoiceRepublicIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
18 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
19 'md5': '0554a24d1657915aa8e8f84e15dc9353',
22 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
24 'title': 'Watching the Watchers: Building a Sousveillance State',
25 'description': 'md5:715ba964958afa2398df615809cfecb1',
26 'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
31 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
32 'only_matching': True,
35 def _real_extract(self
, url
):
36 display_id
= self
._match
_id
(url
)
38 req
= sanitized_Request(
39 compat_urlparse
.urljoin(url
, '/talks/%s' % display_id
))
40 # Older versions of Firefox get redirected to an "upgrade browser" page
41 req
.add_header('User-Agent', 'youtube-dl')
42 webpage
= self
._download
_webpage
(req
, display_id
)
44 if '>Queued for processing, please stand by...<' in webpage
:
46 'Audio is still queued for processing', expected
=True)
48 config
= self
._search
_regex
(
49 r
'(?s)return ({.+?});\s*\n', webpage
,
51 data
= self
._parse
_json
(config
, display_id
, fatal
=False) if config
else None
54 description
= data
.get('teaser')
55 talk_id
= data
.get('talk_id') or display_id
57 duration
= int_or_none(talk
.get('duration'))
59 'url': compat_urlparse
.urljoin(url
, talk_url
),
60 'format_id': format_id
,
61 'ext': determine_ext(talk_url
) or format_id
,
63 } for format_id
, talk_url
in talk
['links'].items()]
65 title
= self
._og
_search
_title
(webpage
)
66 description
= self
._html
_search
_regex
(
67 r
"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
68 webpage
, 'description', fatal
=False)
69 talk_id
= self
._search
_regex
(
70 [r
"id='jc-(\d+)'", r
"data-shareable-id='(\d+)'"],
71 webpage
, 'talk id', default
=None) or display_id
73 player
= self
._search
_regex
(
74 r
"class='vr-player jp-jplayer'([^>]+)>", webpage
, 'player')
76 'url': compat_urlparse
.urljoin(url
, talk_url
),
77 'format_id': format_id
,
78 'ext': determine_ext(talk_url
) or format_id
,
80 } for format_id
, talk_url
in re
.findall(r
"data-([^=]+)='([^']+)'", player
)]
81 self
._sort
_formats
(formats
)
83 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
84 view_count
= int_or_none(self
._search
_regex
(
85 r
"class='play-count[^']*'>\s*(\d+) plays",
86 webpage
, 'play count', fatal
=False))
90 'display_id': display_id
,
92 'description': description
,
93 'thumbnail': thumbnail
,
95 'view_count': view_count
,