]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/escapist.py
1 from __future__
import unicode_literals
3 from . common
import InfoExtractor
15 class EscapistIE ( InfoExtractor
):
16 _VALID_URL
= r
'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
17 _USER_AGENT
= 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
19 'url' : 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate' ,
20 'md5' : 'ab3a706c681efca53f0a35f1415cf0d1' ,
24 'description' : "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition." ,
25 'uploader_id' : 'the-escapist-presents' ,
26 'uploader' : 'The Escapist Presents' ,
27 'title' : "Breaking Down Baldur's Gate" ,
28 'thumbnail' : 're:^https?://.*\.jpg$' ,
33 def _real_extract ( self
, url
):
34 video_id
= self
._ match
_ id
( url
)
35 webpage_req
= compat_urllib_request
. Request ( url
)
36 webpage_req
. add_header ( 'User-Agent' , self
._U SER
_ AGENT
)
37 webpage
= self
._ download
_ webpage
( webpage_req
, video_id
)
39 uploader_id
= self
._ html
_ search
_ regex
(
40 r
"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'" ,
41 webpage
, 'uploader ID' , fatal
= False )
42 uploader
= self
._ html
_ search
_ regex
(
43 r
"<h1\s+class='headline'>(.*?)</a>" ,
44 webpage
, 'uploader' , fatal
= False )
45 description
= self
._ html
_ search
_ meta
( 'description' , webpage
)
46 duration
= parse_duration ( self
._ html
_ search
_ meta
( 'duration' , webpage
))
48 raw_title
= self
._ html
_ search
_ meta
( 'title' , webpage
, fatal
= True )
49 title
= raw_title
. partition ( ' : ' )[ 2 ]
51 config_url
= compat_urllib_parse
. unquote ( self
._ html
_ search
_ regex
(
54 <param\s+name="flashvars".*?\s+value="config=|
55 flashvars="config=
59 webpage
, 'config URL' ))
64 def _add_format ( name
, cfg_url
, quality
):
65 cfg_req
= compat_urllib_request
. Request ( cfg_url
)
66 cfg_req
. add_header ( 'User-Agent' , self
._U SER
_ AGENT
)
67 config
= self
._ download
_ json
(
69 'Downloading ' + name
+ ' configuration' ,
70 'Unable to download ' + name
+ ' configuration' ,
71 transform_source
= js_to_json
)
73 playlist
= config
[ 'playlist' ]
75 if p
. get ( 'eventCategory' ) == 'Video' :
77 elif p
. get ( 'eventCategory' ) == 'Video Postroll' :
87 'User-Agent' : self
._U SER
_ AGENT
,
91 _add_format ( 'normal' , config_url
, quality
= 0 )
92 hq_url
= ( config_url
+
93 ( '&hq=1' if '?' in config_url
else config_url
+ '?hq=1' ))
95 _add_format ( 'hq' , hq_url
, quality
= 1 )
96 except ExtractorError
:
97 pass # That's fine, we'll just use normal quality
98 self
._ sort
_ formats
( formats
)
100 if '/escapist/sales-marketing/' in formats
[- 1 ][ 'url' ]:
101 raise ExtractorError ( 'This IP address has been blocked by The Escapist' , expected
= True )
106 'uploader' : uploader
,
107 'uploader_id' : uploader_id
,
109 'thumbnail' : self
._ og
_ search
_ thumbnail
( webpage
),
110 'description' : description
,
111 'duration' : duration
,
114 if self
._ downloader
. params
. get ( 'include_ads' ) and ad_formats
:
115 self
._ sort
_ formats
( ad_formats
)
117 'id' : ' %s- ad' % video_id
,
118 'title' : ' %s (Postroll)' % title
,
119 'formats' : ad_formats
,
123 'entries' : [ res
, ad_res
],