]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/heise.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   4  from  . common 
import  InfoExtractor
 
  12  class  HeiseIE ( InfoExtractor
):  
  14          https?://(?:www\.)?heise\.de/video/artikel/  
  15          .+?(?P<id>[0-9]+)\.html(?:$|[?#])  
  19              'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'  
  21          'md5' :  'ffed432483e922e88545ad9f2f15d30e' ,  
  26                  "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"  
  28              'format_id' :  'mp4_720p' ,  
  29              'timestamp' :  1411812600 ,  
  30              'upload_date' :  '20140927' ,  
  31              'description' :  'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.' ,  
  32              'thumbnail' :  're:^https?://.*\.jpe?g$' ,  
  36      def  _real_extract ( self
,  url
):  
  37          video_id 
=  self
._ match
_ id
( url
)  
  38          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  
  40          container_id 
=  self
._ search
_ regex
(  
  41              r
'<div class="videoplayerjw".*?data-container="([0-9]+)"' ,  
  42              webpage
,  'container ID' )  
  43          sequenz_id 
=  self
._ search
_ regex
(  
  44              r
'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"' ,  
  45              webpage
,  'sequenz ID' )  
  46          data_url 
=  'http://www.heise.de/videout/feed?container= %s &sequenz= %s '  % ( container_id
,  sequenz_id
)  
  47          doc 
=  self
._ download
_ xml
( data_url
,  video_id
)  
  51              'thumbnail' :  self
._ og
_ search
_ thumbnail
( webpage
),  
  52              'timestamp' :  parse_iso8601 (  
  53                  self
._ html
_ search
_ meta
( 'date' ,  webpage
)),  
  54              'description' :  self
._ og
_ search
_ description
( webpage
),  
  57          title 
=  self
._ html
_ search
_ meta
( 'fulltitle' ,  webpage
)  
  61              info
[ 'title' ] =  self
._ og
_ search
_ title
( webpage
)  
  64          for  source_node 
in  doc
. findall ( './/{http://rss.jwpcdn.com/}source' ):  
  65              label 
=  source_node
. attrib
[ 'label' ]  
  66              height 
=  int_or_none ( self
._ search
_ regex
(  
  67                  r
'^(.*?_)?([0-9]+)p$' ,  label
,  'height' ,  default
= None ))  
  68              video_url 
=  source_node
. attrib
[ 'file' ]  
  69              ext 
=  determine_ext ( video_url
,  '' )  
  73                  'format_id' :  ' %s _ %s '  % ( ext
,  label
),  
  76          self
._ sort
_ formats
( formats
)  
  77          info
[ 'formats' ] =  formats