]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/heise.py 
1629cdb8d5a7ca584321474cb160f9907884dd69
   2  from  __future__ 
import  unicode_literals
   4  from  . common 
import  InfoExtractor
  12  class  HeiseIE ( InfoExtractor
):   14          https?://(?:www\.)?heise\.de/video/artikel/   15          .+?(?P<id>[0-9]+)\.html(?:$|[?#])   19              'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'   21          'md5' :  'ffed432483e922e88545ad9f2f15d30e' ,   26                  "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"   28              'format_id' :  'mp4_720p' ,   29              'timestamp' :  1411812600 ,   30              'upload_date' :  '20140927' ,   31              'description' :  'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.' ,   32              'thumbnail' :  r
're:^https?://.*\.jpe?g$' ,   36      def  _real_extract ( self
,  url
):   37          video_id 
=  self
._ match
_ id
( url
)   38          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)   40          container_id 
=  self
._ search
_ regex
(   41              r
'<div class="videoplayerjw".*?data-container="([0-9]+)"' ,   42              webpage
,  'container ID' )   43          sequenz_id 
=  self
._ search
_ regex
(   44              r
'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"' ,   45              webpage
,  'sequenz ID' )   46          data_url 
=  'http://www.heise.de/videout/feed?container= %s &sequenz= %s '  % ( container_id
,  sequenz_id
)   47          doc 
=  self
._ download
_ xml
( data_url
,  video_id
)   51              'thumbnail' :  self
._ og
_ search
_ thumbnail
( webpage
),   52              'timestamp' :  parse_iso8601 (   53                  self
._ html
_ search
_ meta
( 'date' ,  webpage
)),   54              'description' :  self
._ og
_ search
_ description
( webpage
),   57          title 
=  self
._ html
_ search
_ meta
( 'fulltitle' ,  webpage
)   61              info
[ 'title' ] =  self
._ og
_ search
_ title
( webpage
)   64          for  source_node 
in  doc
. findall ( './/{http://rss.jwpcdn.com/}source' ):   65              label 
=  source_node
. attrib
[ 'label' ]   66              height 
=  int_or_none ( self
._ search
_ regex
(   67                  r
'^(.*?_)?([0-9]+)p$' ,  label
,  'height' ,  default
= None ))   68              video_url 
=  source_node
. attrib
[ 'file' ]   69              ext 
=  determine_ext ( video_url
,  '' )   73                  'format_id' :  ' %s _ %s '  % ( ext
,  label
),   76          self
._ sort
_ formats
( formats
)   77          info
[ 'formats' ] =  formats