]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/slideshare.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   6  from  . common 
import  InfoExtractor
 
  16  class  SlideshareIE ( InfoExtractor
):  
  17      _VALID_URL 
=  r
'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'  
  20          'url' :  'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity' ,  
  24              'title' :  'Managing Scale and Complexity' ,  
  25              'description' :  'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.' ,  
  29      def  _real_extract ( self
,  url
):  
  30          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
  31          page_title 
=  mobj
. group ( 'title' )  
  32          webpage 
=  self
._ download
_ webpage
( url
,  page_title
)  
  33          slideshare_obj 
=  self
._ search
_ regex
(  
  34              r
'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);' ,  
  35              webpage
,  'slideshare object' )  
  36          info 
=  json
. loads ( slideshare_obj
)  
  37          if  info
[ 'slideshow' ][ 'type' ] !=  'video' :  
  38              raise  ExtractorError ( 'Webpage type is " %s ": only video extraction is supported for Slideshare'  %  info
[ 'slideshow' ][ 'type' ],  expected
= True )  
  41          bucket 
=  info
[ 'jsplayer' ][ 'video_bucket' ]  
  42          ext 
=  info
[ 'jsplayer' ][ 'video_extension' ]  
  43          video_url 
=  compat_urlparse
. urljoin ( bucket
,  doc 
+  '-SD.'  +  ext
)  
  44          description 
=  get_element_by_id ( 'slideshow-description-paragraph' ,  webpage
)  or  self
._ html
_ search
_ regex
(  
  45              r
'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>' ,  webpage
,  
  46              'description' ,  fatal
= False )  
  50              'id' :  info
[ 'slideshow' ][ 'id' ],  
  51              'title' :  info
[ 'slideshow' ][ 'title' ],  
  54              'thumbnail' :  info
[ 'slideshow' ][ 'pin_image_url' ],  
  55              'description' :  description
. strip ()  if  description 
else None ,