]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/escapist.py 
e47f3e27a57aa14e3eee526af8998230b524bb4f
   1  from  __future__ 
import  unicode_literals
   3  from  . common 
import  InfoExtractor
  15  class  EscapistIE ( InfoExtractor
):   16      _VALID_URL 
=  r
'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'   17      _USER_AGENT 
=  'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'   19          'url' :  'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate' ,   20          'md5' :  'ab3a706c681efca53f0a35f1415cf0d1' ,   24              'description' :  "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition." ,   25              'uploader_id' :  'the-escapist-presents' ,   26              'uploader' :  'The Escapist Presents' ,   27              'title' :  "Breaking Down Baldur's Gate" ,   28              'thumbnail' :  're:^https?://.*\.jpg$' ,   33      def  _real_extract ( self
,  url
):   34          video_id 
=  self
._ match
_ id
( url
)   35          webpage_req 
=  compat_urllib_request
. Request ( url
)   36          webpage_req
. add_header ( 'User-Agent' ,  self
._U SER
_ AGENT
)   37          webpage 
=  self
._ download
_ webpage
( webpage_req
,  video_id
)   39          uploader_id 
=  self
._ html
_ search
_ regex
(   40              r
"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'" ,   41              webpage
,  'uploader ID' ,  fatal
= False )   42          uploader 
=  self
._ html
_ search
_ regex
(   43              r
"<h1\s+class='headline'>(.*?)</a>" ,   44              webpage
,  'uploader' ,  fatal
= False )   45          description 
=  self
._ html
_ search
_ meta
( 'description' ,  webpage
)   46          duration 
=  parse_duration ( self
._ html
_ search
_ meta
( 'duration' ,  webpage
))   48          raw_title 
=  self
._ html
_ search
_ meta
( 'title' ,  webpage
,  fatal
= True )   49          title 
=  raw_title
. partition ( ' : ' )[ 2 ]   51          config_url 
=  compat_urllib_parse
. unquote ( self
._ html
_ search
_ regex
(   54                  <param\s+name="flashvars".*?\s+value="config=|   55                  flashvars="config=   59              webpage
,  'config URL' ))   64          def  _add_format ( name
,  cfg_url
,  quality
):   65              cfg_req 
=  compat_urllib_request
. Request ( cfg_url
)   66              cfg_req
. add_header ( 'User-Agent' ,  self
._U SER
_ AGENT
)   67              config 
=  self
._ download
_ json
(   69                  'Downloading '  +  name 
+  ' configuration' ,   70                  'Unable to download '  +  name 
+  ' configuration' ,   71                  transform_source
= js_to_json
)   73              playlist 
=  config
[ 'playlist' ]   75                  if  p
. get ( 'eventCategory' ) ==  'Video' :   77                  elif  p
. get ( 'eventCategory' ) ==  'Video Postroll' :   87                          'User-Agent' :  self
._U SER
_ AGENT
,   91          _add_format ( 'normal' ,  config_url
,  quality
= 0 )   92          hq_url 
= ( config_url 
+   93                    ( '&hq=1'  if  '?'  in  config_url 
else  config_url 
+  '?hq=1' ))   95              _add_format ( 'hq' ,  hq_url
,  quality
= 1 )   96          except  ExtractorError
:   97              pass   # That's fine, we'll just use normal quality   98          self
._ sort
_ formats
( formats
)  100          if  '/escapist/sales-marketing/'  in  formats
[- 1 ][ 'url' ]:  101              raise  ExtractorError ( 'This IP address has been blocked by The Escapist' ,  expected
= True )  106              'uploader' :  uploader
,  107              'uploader_id' :  uploader_id
,  109              'thumbnail' :  self
._ og
_ search
_ thumbnail
( webpage
),  110              'description' :  description
,  111              'duration' :  duration
,  114          if  self
._ downloader
. params
. get ( 'include_ads' )  and  ad_formats
:  115              self
._ sort
_ formats
( ad_formats
)  117                  'id' :  ' %s- ad'  %  video_id
,  118                  'title' :  ' %s  (Postroll)'  %  title
,  119                  'formats' :  ad_formats
,  123                  'entries' : [ res
,  ad_res
],