]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
17 class VoiceRepublicIE(InfoExtractor
):
18 _VALID_URL
= r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
20 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
21 'md5': '0554a24d1657915aa8e8f84e15dc9353',
24 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
26 'title': 'Watching the Watchers: Building a Sousveillance State',
27 'description': 'md5:715ba964958afa2398df615809cfecb1',
28 'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
33 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
34 'only_matching': True,
37 def _real_extract(self
, url
):
38 display_id
= self
._match
_id
(url
)
40 req
= compat_urllib_request
.Request(
41 compat_urlparse
.urljoin(url
, '/talks/%s' % display_id
))
42 # Older versions of Firefox get redirected to an "upgrade browser" page
43 req
.add_header('User-Agent', 'youtube-dl')
44 webpage
= self
._download
_webpage
(req
, display_id
)
46 if '>Queued for processing, please stand by...<' in webpage
:
48 'Audio is still queued for processing', expected
=True)
50 config
= self
._search
_regex
(
51 r
'(?s)return ({.+?});\s*\n', webpage
,
53 data
= self
._parse
_json
(config
, display_id
, fatal
=False) if config
else None
56 description
= data
.get('teaser')
57 talk_id
= data
.get('talk_id') or display_id
59 duration
= int_or_none(talk
.get('duration'))
61 'url': compat_urlparse
.urljoin(url
, talk_url
),
62 'format_id': format_id
,
63 'ext': determine_ext(talk_url
) or format_id
,
65 } for format_id
, talk_url
in talk
['links'].items()]
67 title
= self
._og
_search
_title
(webpage
)
68 description
= self
._html
_search
_regex
(
69 r
"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
70 webpage
, 'description', fatal
=False)
71 talk_id
= self
._search
_regex
(
72 [r
"id='jc-(\d+)'", r
"data-shareable-id='(\d+)'"],
73 webpage
, 'talk id', default
=None) or display_id
75 player
= self
._search
_regex
(
76 r
"class='vr-player jp-jplayer'([^>]+)>", webpage
, 'player')
78 'url': compat_urlparse
.urljoin(url
, talk_url
),
79 'format_id': format_id
,
80 'ext': determine_ext(talk_url
) or format_id
,
82 } for format_id
, talk_url
in re
.findall(r
"data-([^=]+)='([^']+)'", player
)]
83 self
._sort
_formats
(formats
)
85 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
86 view_count
= int_or_none(self
._search
_regex
(
87 r
"class='play-count[^']*'>\s*(\d+) plays",
88 webpage
, 'play count', fatal
=False))
92 'display_id': display_id
,
94 'description': description
,
95 'thumbnail': thumbnail
,
97 'view_count': view_count
,