]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/slideshare.py
4 from . common
import InfoExtractor
11 class SlideshareIE ( InfoExtractor
):
12 _VALID_URL
= r
'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
15 u
'url' : u
'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity' ,
16 u
'file' : u
'25665706.mp4' ,
18 u
'title' : u
'Managing Scale and Complexity' ,
19 u
'description' : u
'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix' ,
23 def _real_extract ( self
, url
):
24 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
25 page_title
= mobj
. group ( 'title' )
26 webpage
= self
._ download
_ webpage
( url
, page_title
)
27 slideshare_obj
= self
._ search
_ regex
(
28 r
'var slideshare_object = ({.*?}); var user_info =' ,
29 webpage
, u
'slideshare object' )
30 info
= json
. loads ( slideshare_obj
)
31 if info
[ 'slideshow' ][ 'type' ] != u
'video' :
32 raise ExtractorError ( u
'Webpage type is " %s ": only video extraction is supported for Slideshare' % info
[ 'slideshow' ][ 'type' ], expected
= True )
35 bucket
= info
[ 'jsplayer' ][ 'video_bucket' ]
36 ext
= info
[ 'jsplayer' ][ 'video_extension' ]
37 video_url
= compat_urlparse
. urljoin ( bucket
, doc
+ '-SD.' + ext
)
41 'id' : info
[ 'slideshow' ][ 'id' ],
42 'title' : info
[ 'slideshow' ][ 'title' ],
45 'thumbnail' : info
[ 'slideshow' ][ 'pin_image_url' ],
46 'description' : self
._ og
_ search
_ description
( webpage
),