]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py 
   1  from  __future__ 
import  unicode_literals
   5  from  . common 
import  InfoExtractor
  18  class  VoiceRepublicIE ( InfoExtractor
):   19      _VALID_URL 
=  r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'   21          'url' :  'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state' ,   22          'md5' :  'b9174d651323f17783000876347116e3' ,   25              'display_id' :  'watching-the-watchers-building-a-sousveillance-state' ,   27              'title' :  'Watching the Watchers: Building a Sousveillance State' ,   28              'description' :  'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.' ,   29              'thumbnail' :  're:^https?://.*\.(?:png|jpg)$' ,   34          'url' :  'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state' ,   35          'only_matching' :  True ,   38      def  _real_extract ( self
,  url
):   39          display_id 
=  self
._ match
_ id
( url
)   41          req 
=  sanitized_Request (   42              compat_urlparse
. urljoin ( url
,  '/talks/ %s '  %  display_id
))   43          # Older versions of Firefox get redirected to an "upgrade browser" page   44          req
. add_header ( 'User-Agent' ,  'youtube-dl' )   45          webpage 
=  self
._ download
_ webpage
( req
,  display_id
)   47          if  '>Queued for processing, please stand by...<'  in  webpage
:   49                  'Audio is still queued for processing' ,  expected
= True )   51          config 
=  self
._ search
_ regex
(   52              r
'(?s)return ({.+?});\s*\n' ,  webpage
,   54          data 
=  self
._ parse
_ json
( config
,  display_id
,  fatal
= False )  if  config 
else None   57              description 
=  data
. get ( 'teaser' )   58              talk_id 
=  compat_str ( data
. get ( 'talk_id' )  or  display_id
)   60              duration 
=  int_or_none ( talk
. get ( 'duration' ))   62                  'url' :  compat_urlparse
. urljoin ( url
,  talk_url
),   63                  'format_id' :  format_id
,   64                  'ext' :  determine_ext ( talk_url
)  or  format_id
,   66              }  for  format_id
,  talk_url 
in  talk
[ 'links' ]. items ()]   68              title 
=  self
._ og
_ search
_ title
( webpage
)   69              description 
=  self
._ html
_ search
_ regex
(   70                  r
"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>" ,   71                  webpage
,  'description' ,  fatal
= False )   72              talk_id 
=  self
._ search
_ regex
(   73                  [ r
"id='jc-(\d+)'" ,  r
"data-shareable-id='(\d+)'" ],   74                  webpage
,  'talk id' ,  default
= None )  or  display_id
  76              player 
=  self
._ search
_ regex
(   77                  r
"class='vr-player jp-jplayer'([^>]+)>" ,  webpage
,  'player' )   79                  'url' :  compat_urlparse
. urljoin ( url
,  talk_url
),   80                  'format_id' :  format_id
,   81                  'ext' :  determine_ext ( talk_url
)  or  format_id
,   83              }  for  format_id
,  talk_url 
in  re
. findall ( r
"data-([^=]+)='([^']+)'" ,  player
)]   84          self
._ sort
_ formats
( formats
)   86          thumbnail 
=  self
._ og
_ search
_ thumbnail
( webpage
)   87          view_count 
=  int_or_none ( self
._ search
_ regex
(   88              r
"class='play-count[^']*'>\s*(\d+) plays" ,   89              webpage
,  'play count' ,  fatal
= False ))   93              'display_id' :  display_id
,   95              'description' :  description
,   96              'thumbnail' :  thumbnail
,   98              'view_count' :  view_count
,