]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/elpais.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   6  from  . common 
import  InfoExtractor
 
   7  from  .. utils 
import  unified_strdate
 
  10  class  ElPaisIE ( InfoExtractor
):  
  11      _VALID_URL 
=  r
'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'  
  15          'url' :  'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html' ,  
  16          'md5' :  '98406f301f19562170ec071b83433d55' ,  
  18              'id' :  'tiempo-nuevo-recetas-viejas' ,  
  20              'title' :  'Tiempo nuevo, recetas viejas' ,  
  21              'description' :  'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.' ,  
  22              'upload_date' :  '20140206' ,  
  26      def  _real_extract ( self
,  url
):  
  27          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
  28          video_id 
=  mobj
. group ( 'id' )  
  30          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  
  32          prefix 
=  self
._ html
_ search
_ regex
(  
  33              r
'var url_cache = "([^"]+)";' ,  webpage
,  'URL prefix' )  
  34          video_suffix 
=  self
._ search
_ regex
(  
  35              r
"URLMediaFile = url_cache \+ '([^']+)'" ,  webpage
,  'video URL' )  
  36          video_url 
=  prefix 
+  video_suffix
 
  37          thumbnail_suffix 
=  self
._ search
_ regex
(  
  38              r
"URLMediaStill = url_cache \+ '([^']+)'" ,  webpage
,  'thumbnail URL' ,  
  41              None if  thumbnail_suffix 
is None  
  42              else  prefix 
+  thumbnail_suffix
)  
  43          title 
=  self
._ html
_ search
_ regex
(  
  44              '<h2 class="entry-header entry-title.*?>(.*?)</h2>' ,  
  46          date_str 
=  self
._ search
_ regex
(  
  47              r
'<p class="date-header date-int updated"\s+title="([^"]+)">' ,  
  48              webpage
,  'upload date' ,  fatal
= False )  
  49          upload_date 
= ( None if  date_str 
is None else  unified_strdate ( date_str
))  
  55              'description' :  self
._ og
_ search
_ description
( webpage
),  
  56              'thumbnail' :  thumbnail
,  
  57              'upload_date' :  upload_date
,