]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/slideshare.py
1 from __future__
import unicode_literals
6 from . common
import InfoExtractor
13 class SlideshareIE ( InfoExtractor
):
14 _VALID_URL
= r
'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
17 'url' : 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity' ,
21 'title' : 'Managing Scale and Complexity' ,
22 'description' : 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.' ,
26 def _real_extract ( self
, url
):
27 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
28 page_title
= mobj
. group ( 'title' )
29 webpage
= self
._ download
_ webpage
( url
, page_title
)
30 slideshare_obj
= self
._ search
_ regex
(
31 r
'var slideshare_object = ({.*?}); var user_info =' ,
32 webpage
, 'slideshare object' )
33 info
= json
. loads ( slideshare_obj
)
34 if info
[ 'slideshow' ][ 'type' ] != 'video' :
35 raise ExtractorError ( 'Webpage type is " %s ": only video extraction is supported for Slideshare' % info
[ 'slideshow' ][ 'type' ], expected
= True )
38 bucket
= info
[ 'jsplayer' ][ 'video_bucket' ]
39 ext
= info
[ 'jsplayer' ][ 'video_extension' ]
40 video_url
= compat_urlparse
. urljoin ( bucket
, doc
+ '-SD.' + ext
)
41 description
= self
._ html
_ search
_ regex
(
42 r
'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>' , webpage
,
43 'description' , fatal
= False )
47 'id' : info
[ 'slideshow' ][ 'id' ],
48 'title' : info
[ 'slideshow' ][ 'title' ],
51 'thumbnail' : info
[ 'slideshow' ][ 'pin_image_url' ],
52 'description' : description
,