]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/heise.py
2 from __future__
import unicode_literals
4 from . common
import InfoExtractor
12 class HeiseIE ( InfoExtractor
):
14 https?://(?:www\.)?heise\.de/video/artikel/
15 .+?(?P<id>[0-9]+)\.html(?:$|[?#])
19 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
21 'md5' : 'ffed432483e922e88545ad9f2f15d30e' ,
26 "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
28 'format_id' : 'mp4_720p' ,
29 'timestamp' : 1411812600 ,
30 'upload_date' : '20140927' ,
31 'description' : 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.' ,
32 'thumbnail' : r
're:^https?://.*\.jpe?g$' ,
36 def _real_extract ( self
, url
):
37 video_id
= self
._ match
_ id
( url
)
38 webpage
= self
._ download
_ webpage
( url
, video_id
)
40 container_id
= self
._ search
_ regex
(
41 r
'<div class="videoplayerjw".*?data-container="([0-9]+)"' ,
42 webpage
, 'container ID' )
43 sequenz_id
= self
._ search
_ regex
(
44 r
'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"' ,
45 webpage
, 'sequenz ID' )
46 data_url
= 'http://www.heise.de/videout/feed?container= %s &sequenz= %s ' % ( container_id
, sequenz_id
)
47 doc
= self
._ download
_ xml
( data_url
, video_id
)
51 'thumbnail' : self
._ og
_ search
_ thumbnail
( webpage
),
52 'timestamp' : parse_iso8601 (
53 self
._ html
_ search
_ meta
( 'date' , webpage
)),
54 'description' : self
._ og
_ search
_ description
( webpage
),
57 title
= self
._ html
_ search
_ meta
( 'fulltitle' , webpage
)
61 info
[ 'title' ] = self
._ og
_ search
_ title
( webpage
)
64 for source_node
in doc
. findall ( './/{http://rss.jwpcdn.com/}source' ):
65 label
= source_node
. attrib
[ 'label' ]
66 height
= int_or_none ( self
._ search
_ regex
(
67 r
'^(.*?_)?([0-9]+)p$' , label
, 'height' , default
= None ))
68 video_url
= source_node
. attrib
[ 'file' ]
69 ext
= determine_ext ( video_url
, '' )
73 'format_id' : ' %s _ %s ' % ( ext
, label
),
76 self
._ sort
_ formats
( formats
)
77 info
[ 'formats' ] = formats