]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   5  from  . common 
import  InfoExtractor
 
  18  class  VoiceRepublicIE ( InfoExtractor
):  
  19      _VALID_URL 
=  r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'  
  21          'url' :  'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state' ,  
  22          'md5' :  'b9174d651323f17783000876347116e3' ,  
  25              'display_id' :  'watching-the-watchers-building-a-sousveillance-state' ,  
  27              'title' :  'Watching the Watchers: Building a Sousveillance State' ,  
  28              'description' :  'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.' ,  
  29              'thumbnail' :  r
're:^https?://.*\.(?:png|jpg)$' ,  
  34          'url' :  'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state' ,  
  35          'only_matching' :  True ,  
  38      def  _real_extract ( self
,  url
):  
  39          display_id 
=  self
._ match
_ id
( url
)  
  41          req 
=  sanitized_Request (  
  42              compat_urlparse
. urljoin ( url
,  '/talks/ %s '  %  display_id
))  
  43          # Older versions of Firefox get redirected to an "upgrade browser" page  
  44          req
. add_header ( 'User-Agent' ,  'youtube-dl' )  
  45          webpage 
=  self
._ download
_ webpage
( req
,  display_id
)  
  47          if  '>Queued for processing, please stand by...<'  in  webpage
:  
  49                  'Audio is still queued for processing' ,  expected
= True )  
  51          config 
=  self
._ search
_ regex
(  
  52              r
'(?s)return ({.+?});\s*\n' ,  webpage
,  
  54          data 
=  self
._ parse
_ json
( config
,  display_id
,  fatal
= False )  if  config 
else None  
  57              description 
=  data
. get ( 'teaser' )  
  58              talk_id 
=  compat_str ( data
. get ( 'talk_id' )  or  display_id
)  
  60              duration 
=  int_or_none ( talk
. get ( 'duration' ))  
  62                  'url' :  compat_urlparse
. urljoin ( url
,  talk_url
),  
  63                  'format_id' :  format_id
,  
  64                  'ext' :  determine_ext ( talk_url
)  or  format_id
,  
  66              }  for  format_id
,  talk_url 
in  talk
[ 'links' ]. items ()]  
  68              title 
=  self
._ og
_ search
_ title
( webpage
)  
  69              description 
=  self
._ html
_ search
_ regex
(  
  70                  r
"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>" ,  
  71                  webpage
,  'description' ,  fatal
= False )  
  72              talk_id 
=  self
._ search
_ regex
(  
  73                  [ r
"id='jc-(\d+)'" ,  r
"data-shareable-id='(\d+)'" ],  
  74                  webpage
,  'talk id' ,  default
= None )  or  display_id
 
  76              player 
=  self
._ search
_ regex
(  
  77                  r
"class='vr-player jp-jplayer'([^>]+)>" ,  webpage
,  'player' )  
  79                  'url' :  compat_urlparse
. urljoin ( url
,  talk_url
),  
  80                  'format_id' :  format_id
,  
  81                  'ext' :  determine_ext ( talk_url
)  or  format_id
,  
  83              }  for  format_id
,  talk_url 
in  re
. findall ( r
"data-([^=]+)='([^']+)'" ,  player
)]  
  84          self
._ sort
_ formats
( formats
)  
  86          thumbnail 
=  self
._ og
_ search
_ thumbnail
( webpage
)  
  87          view_count 
=  int_or_none ( self
._ search
_ regex
(  
  88              r
"class='play-count[^']*'>\s*(\d+) plays" ,  
  89              webpage
,  'play count' ,  fatal
= False ))  
  93              'display_id' :  display_id
,  
  95              'description' :  description
,  
  96              'thumbnail' :  thumbnail
,  
  98              'view_count' :  view_count
,