]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/elpais.py
00a69e6312aede6069e062c6abff29137939daa9
2 from __future__
import unicode_literals
4 from . common
import InfoExtractor
5 from .. utils
import unified_strdate
8 class ElPaisIE ( InfoExtractor
):
9 _VALID_URL
= r
'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
13 'url' : 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html' ,
14 'md5' : '98406f301f19562170ec071b83433d55' ,
16 'id' : 'tiempo-nuevo-recetas-viejas' ,
18 'title' : 'Tiempo nuevo, recetas viejas' ,
19 'description' : 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.' ,
20 'upload_date' : '20140206' ,
24 def _real_extract ( self
, url
):
25 video_id
= self
._ match
_ id
( url
)
26 webpage
= self
._ download
_ webpage
( url
, video_id
)
28 prefix
= self
._ html
_ search
_ regex
(
29 r
'var url_cache = "([^"]+)";' , webpage
, 'URL prefix' )
30 video_suffix
= self
._ search
_ regex
(
31 r
"URLMediaFile = url_cache \+ '([^']+)'" , webpage
, 'video URL' )
32 video_url
= prefix
+ video_suffix
33 thumbnail_suffix
= self
._ search
_ regex
(
34 r
"URLMediaStill = url_cache \+ '([^']+)'" , webpage
, 'thumbnail URL' ,
37 None if thumbnail_suffix
is None
38 else prefix
+ thumbnail_suffix
)
39 title
= self
._ html
_ search
_ regex
(
40 '<h2 class="entry-header entry-title.*?>(.*?)</h2>' ,
42 date_str
= self
._ search
_ regex
(
43 r
'<p class="date-header date-int updated"\s+title="([^"]+)">' ,
44 webpage
, 'upload date' , fatal
= False )
45 upload_date
= ( None if date_str
is None else unified_strdate ( date_str
))
51 'description' : self
._ og
_ search
_ description
( webpage
),
52 'thumbnail' : thumbnail
,
53 'upload_date' : upload_date
,