]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/elpais.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
7 from .. utils
import unified_strdate
10 class ElPaisIE ( InfoExtractor
):
11 _VALID_URL
= r
'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
15 'url' : 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html' ,
16 'md5' : '98406f301f19562170ec071b83433d55' ,
18 'id' : 'tiempo-nuevo-recetas-viejas' ,
20 'title' : 'Tiempo nuevo, recetas viejas' ,
21 'description' : 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.' ,
22 'upload_date' : '20140206' ,
26 def _real_extract ( self
, url
):
27 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
28 video_id
= mobj
. group ( 'id' )
30 webpage
= self
._ download
_ webpage
( url
, video_id
)
32 prefix
= self
._ html
_ search
_ regex
(
33 r
'var url_cache = "([^"]+)";' , webpage
, 'URL prefix' )
34 video_suffix
= self
._ search
_ regex
(
35 r
"URLMediaFile = url_cache \+ '([^']+)'" , webpage
, 'video URL' )
36 video_url
= prefix
+ video_suffix
37 thumbnail_suffix
= self
._ search
_ regex
(
38 r
"URLMediaStill = url_cache \+ '([^']+)'" , webpage
, 'thumbnail URL' ,
41 None if thumbnail_suffix
is None
42 else prefix
+ thumbnail_suffix
)
43 title
= self
._ html
_ search
_ regex
(
44 '<h2 class="entry-header entry-title.*?>(.*?)</h2>' ,
46 date_str
= self
._ search
_ regex
(
47 r
'<p class="date-header date-int updated"\s+title="([^"]+)">' ,
48 webpage
, 'upload date' , fatal
= False )
49 upload_date
= ( None if date_str
is None else unified_strdate ( date_str
))
55 'description' : self
._ og
_ search
_ description
( webpage
),
56 'thumbnail' : thumbnail
,
57 'upload_date' : upload_date
,