]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/infoq.py
1 from __future__
import unicode_literals
6 from . common
import InfoExtractor
12 class InfoQIE ( InfoExtractor
):
13 _VALID_URL
= r
'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
16 'url' : 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things' ,
17 'md5' : 'b5ca0e0a8c1fed93b0e65e48e462f9a2' ,
19 'id' : '12-jan-pythonthings' ,
21 'description' : 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.' ,
22 'title' : 'A Few of My Favorite [Python] Things' ,
26 def _real_extract ( self
, url
):
27 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
28 video_id
= mobj
. group ( 'id' )
30 webpage
= self
._ download
_ webpage
( url
, video_id
)
32 video_title
= self
._ html
_ search
_ regex
( r
'<title>(.*?)</title>' , webpage
, 'title' )
33 video_description
= self
._ html
_ search
_ meta
( 'description' , webpage
, 'description' )
35 # The server URL is hardcoded
36 video_url
= 'rtmpe://video.infoq.com/cfx/st/'
39 encoded_id
= self
._ search
_ regex
(
40 r
"jsclassref\s*=\s*'([^']*)'" , webpage
, 'encoded id' )
41 real_id
= compat_urllib_parse
. unquote ( base64
. b64decode ( encoded_id
. encode ( 'ascii' )). decode ( 'utf-8' ))
42 playpath
= 'mp4:' + real_id
44 video_filename
= playpath
. split ( '/' )[- 1 ]
45 video_id
, extension
= video_filename
. split ( '.' )
47 http_base
= self
._ search
_ regex
(
48 r
'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)' , webpage
,
55 'play_path' : playpath
,
58 'url' : http_base
+ real_id
,
60 self
._ sort
_ formats
( formats
)
65 'description' : video_description
,