]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/slideshare.py
1 from __future__
import unicode_literals
6 from . common
import InfoExtractor
15 class SlideshareIE ( InfoExtractor
):
16 _VALID_URL
= r
'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
19 'url' : 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity' ,
23 'title' : 'Managing Scale and Complexity' ,
24 'description' : 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.' ,
28 def _real_extract ( self
, url
):
29 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
30 page_title
= mobj
. group ( 'title' )
31 webpage
= self
._ download
_ webpage
( url
, page_title
)
32 slideshare_obj
= self
._ search
_ regex
(
33 r
'\$\.extend\(slideshare_object,\s*(\{.*?\})\);' ,
34 webpage
, 'slideshare object' )
35 info
= json
. loads ( slideshare_obj
)
36 if info
[ 'slideshow' ][ 'type' ] != 'video' :
37 raise ExtractorError ( 'Webpage type is " %s ": only video extraction is supported for Slideshare' % info
[ 'slideshow' ][ 'type' ], expected
= True )
40 bucket
= info
[ 'jsplayer' ][ 'video_bucket' ]
41 ext
= info
[ 'jsplayer' ][ 'video_extension' ]
42 video_url
= compat_urlparse
. urljoin ( bucket
, doc
+ '-SD.' + ext
)
43 description
= self
._ html
_ search
_ regex
(
44 r
'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>' , webpage
,
45 'description' , fatal
= False )
49 'id' : info
[ 'slideshow' ][ 'id' ],
50 'title' : info
[ 'slideshow' ][ 'title' ],
53 'thumbnail' : info
[ 'slideshow' ][ 'pin_image_url' ],
54 'description' : description
,