]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py
1 from __future__
import unicode_literals
3 from . common
import InfoExtractor
4 from .. compat
import compat_str
13 class VoiceRepublicIE ( InfoExtractor
):
14 _VALID_URL
= r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
16 'url' : 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state' ,
17 'md5' : 'b9174d651323f17783000876347116e3' ,
20 'display_id' : 'watching-the-watchers-building-a-sousveillance-state' ,
22 'title' : 'Watching the Watchers: Building a Sousveillance State' ,
23 'description' : 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.' ,
28 'url' : 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state' ,
29 'only_matching' : True ,
32 def _real_extract ( self
, url
):
33 display_id
= self
._ match
_ id
( url
)
35 webpage
= self
._ download
_ webpage
( url
, display_id
)
37 if '>Queued for processing, please stand by...<' in webpage
:
39 'Audio is still queued for processing' , expected
= True )
41 talk
= self
._ parse
_ json
( self
._ search
_ regex
(
42 r
'initialSnapshot\s*=\s*({.+?});' ,
43 webpage
, 'talk' ), display_id
)[ 'talk' ]
46 'url' : urljoin ( url
, talk_url
),
47 'format_id' : format_id
,
48 'ext' : determine_ext ( talk_url
) or format_id
,
50 } for format_id
, talk_url
in talk
[ 'media_links' ]. items ()]
51 self
._ sort
_ formats
( formats
)
54 'id' : compat_str ( talk
. get ( 'id' ) or display_id
),
55 'display_id' : display_id
,
57 'description' : talk
. get ( 'teaser' ),
58 'thumbnail' : talk
. get ( 'image_url' ),
59 'duration' : int_or_none ( talk
. get ( 'archived_duration' )),
60 'view_count' : int_or_none ( talk
. get ( 'play_count' )),