]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/slideshare.py
1 from __future__
import unicode_literals
6 from . common
import InfoExtractor
16 class SlideshareIE ( InfoExtractor
):
17 _VALID_URL
= r
'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
20 'url' : 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity' ,
24 'title' : 'Managing Scale and Complexity' ,
25 'description' : 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.' ,
29 def _real_extract ( self
, url
):
30 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
31 page_title
= mobj
. group ( 'title' )
32 webpage
= self
._ download
_ webpage
( url
, page_title
)
33 slideshare_obj
= self
._ search
_ regex
(
34 r
'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);' ,
35 webpage
, 'slideshare object' )
36 info
= json
. loads ( slideshare_obj
)
37 if info
[ 'slideshow' ][ 'type' ] != 'video' :
38 raise ExtractorError ( 'Webpage type is " %s ": only video extraction is supported for Slideshare' % info
[ 'slideshow' ][ 'type' ], expected
= True )
41 bucket
= info
[ 'jsplayer' ][ 'video_bucket' ]
42 ext
= info
[ 'jsplayer' ][ 'video_extension' ]
43 video_url
= compat_urlparse
. urljoin ( bucket
, doc
+ '-SD.' + ext
)
44 description
= get_element_by_id ( 'slideshow-description-paragraph' , webpage
) or self
._ html
_ search
_ regex
(
45 r
'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>' , webpage
,
46 'description' , fatal
= False )
50 'id' : info
[ 'slideshow' ][ 'id' ],
51 'title' : info
[ 'slideshow' ][ 'title' ],
54 'thumbnail' : info
[ 'slideshow' ][ 'pin_image_url' ],
55 'description' : description
. strip () if description
else None ,