]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
18 class VoiceRepublicIE ( InfoExtractor
):
19 _VALID_URL
= r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
21 'url' : 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state' ,
22 'md5' : 'b9174d651323f17783000876347116e3' ,
25 'display_id' : 'watching-the-watchers-building-a-sousveillance-state' ,
27 'title' : 'Watching the Watchers: Building a Sousveillance State' ,
28 'description' : 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.' ,
29 'thumbnail' : 're:^https?://.*\.(?:png|jpg)$' ,
34 'url' : 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state' ,
35 'only_matching' : True ,
38 def _real_extract ( self
, url
):
39 display_id
= self
._ match
_ id
( url
)
41 req
= sanitized_Request (
42 compat_urlparse
. urljoin ( url
, '/talks/ %s ' % display_id
))
43 # Older versions of Firefox get redirected to an "upgrade browser" page
44 req
. add_header ( 'User-Agent' , 'youtube-dl' )
45 webpage
= self
._ download
_ webpage
( req
, display_id
)
47 if '>Queued for processing, please stand by...<' in webpage
:
49 'Audio is still queued for processing' , expected
= True )
51 config
= self
._ search
_ regex
(
52 r
'(?s)return ({.+?});\s*\n' , webpage
,
54 data
= self
._ parse
_ json
( config
, display_id
, fatal
= False ) if config
else None
57 description
= data
. get ( 'teaser' )
58 talk_id
= compat_str ( data
. get ( 'talk_id' ) or display_id
)
60 duration
= int_or_none ( talk
. get ( 'duration' ))
62 'url' : compat_urlparse
. urljoin ( url
, talk_url
),
63 'format_id' : format_id
,
64 'ext' : determine_ext ( talk_url
) or format_id
,
66 } for format_id
, talk_url
in talk
[ 'links' ]. items ()]
68 title
= self
._ og
_ search
_ title
( webpage
)
69 description
= self
._ html
_ search
_ regex
(
70 r
"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>" ,
71 webpage
, 'description' , fatal
= False )
72 talk_id
= self
._ search
_ regex
(
73 [ r
"id='jc-(\d+)'" , r
"data-shareable-id='(\d+)'" ],
74 webpage
, 'talk id' , default
= None ) or display_id
76 player
= self
._ search
_ regex
(
77 r
"class='vr-player jp-jplayer'([^>]+)>" , webpage
, 'player' )
79 'url' : compat_urlparse
. urljoin ( url
, talk_url
),
80 'format_id' : format_id
,
81 'ext' : determine_ext ( talk_url
) or format_id
,
83 } for format_id
, talk_url
in re
. findall ( r
"data-([^=]+)='([^']+)'" , player
)]
84 self
._ sort
_ formats
( formats
)
86 thumbnail
= self
._ og
_ search
_ thumbnail
( webpage
)
87 view_count
= int_or_none ( self
._ search
_ regex
(
88 r
"class='play-count[^']*'>\s*(\d+) plays" ,
89 webpage
, 'play count' , fatal
= False ))
93 'display_id' : display_id
,
95 'description' : description
,
96 'thumbnail' : thumbnail
,
98 'view_count' : view_count
,