]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   7  from  . common 
import  InfoExtractor
 
  16  def  extract_from_xml_url ( ie
,  video_id
,  xml_url
):  
  17      doc 
=  ie
._ download
_ xml
(  
  19          note
= 'Downloading video info' ,  
  20          errnote
= 'Failed to download video info' )  
  22      title 
=  doc
. find ( './/information/title' ). text
 
  23      description 
=  xpath_text ( doc
,  './/information/detail' ,  'description' )  
  24      duration 
=  int_or_none ( xpath_text ( doc
,  './/details/lengthSec' ,  'duration' ))  
  25      uploader 
=  xpath_text ( doc
,  './/details/originChannelTitle' ,  'uploader' )  
  26      uploader_id 
=  xpath_text ( doc
,  './/details/originChannelId' ,  'uploader id' )  
  27      upload_date 
=  unified_strdate ( xpath_text ( doc
,  './/details/airtime' ,  'upload date' ))  
  29      def  xml_to_format ( fnode
):  
  30          video_url 
=  fnode
. find ( 'url' ). text
 
  31          is_available 
=  'http://www.metafilegenerator'  not in  video_url
 
  33          format_id 
=  fnode
. attrib
[ 'basetype' ]  
  34          format_m 
=  re
. match ( r
'''(?x)  
  35              (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_  
  36              (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)  
  39          ext 
=  format_m
. group ( 'container' )  
  40          proto 
=  format_m
. group ( 'proto' ). lower ()  
  42          quality 
=  xpath_text ( fnode
,  './quality' ,  'quality' )  
  43          abr 
=  int_or_none ( xpath_text ( fnode
,  './audioBitrate' ,  'abr' ),  1000 )  
  44          vbr 
=  int_or_none ( xpath_text ( fnode
,  './videoBitrate' ,  'vbr' ),  1000 )  
  46          width 
=  int_or_none ( xpath_text ( fnode
,  './width' ,  'width' ))  
  47          height 
=  int_or_none ( xpath_text ( fnode
,  './height' ,  'height' ))  
  49          filesize 
=  int_or_none ( xpath_text ( fnode
,  './filesize' ,  'filesize' ))  
  56              'format_id' :  format_id 
+  '-'  +  quality
,  
  59              'acodec' :  format_m
. group ( 'acodec' ),  
  60              'vcodec' :  format_m
. group ( 'vcodec' ),  
  66              'format_note' :  format_note
,  
  68              '_available' :  is_available
,  
  71      def  xml_to_thumbnails ( fnode
):  
  74              thumbnail_url 
=  node
. text
 
  80              if  'key'  in  node
. attrib
:  
  81                  m 
=  re
. match ( '^([0-9]+)x([0-9]+)$' ,  node
. attrib
[ 'key' ])  
  83                      thumbnail
[ 'width' ] =  int ( m
. group ( 1 ))  
  84                      thumbnail
[ 'height' ] =  int ( m
. group ( 2 ))  
  85              thumbnails
. append ( thumbnail
)  
  88      thumbnails 
=  xml_to_thumbnails ( doc
. findall ( './/teaserimages/teaserimage' ))  
  90      format_nodes 
=  doc
. findall ( './/formitaeten/formitaet' )  
  91      formats 
=  list ( filter (  
  92          lambda  f
:  f
[ '_available' ],  
  93          map ( xml_to_format
,  format_nodes
)))  
  94      ie
._ sort
_ formats
( formats
)  
  99          'description' :  description
,  
 100          'duration' :  duration
,  
 101          'thumbnails' :  thumbnails
,  
 102          'uploader' :  uploader
,  
 103          'uploader_id' :  uploader_id
,  
 104          'upload_date' :  upload_date
,  
 109  class  ZDFIE ( InfoExtractor
):  
 110      _VALID_URL 
=  r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'  
 113          'url' :  'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,  
 117              'title' :  'ZDFspezial - Ende des Machtpokers' ,  
 118              'description' :  'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,  
 120              'uploader' :  'spezial' ,  
 121              'uploader_id' :  '225948' ,  
 122              'upload_date' :  '20131127' ,  
 124          'skip' :  'Videos on ZDF.de are depublicised in short order' ,  
 127      def  _real_extract ( self
,  url
):  
 128          video_id 
=  self
._ match
_ id
( url
)  
 129          xml_url 
=  'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s '  %  video_id
 
 130          return  extract_from_xml_url ( self
,  video_id
,  xml_url
)  
 133  class  ZDFChannelIE ( InfoExtractor
):  
 134      _VALID_URL 
=  r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'  
 136          'url' :  'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,  
 144      def  _fetch_page ( self
,  channel_id
,  page
):  
 145          offset 
=  page 
*  self
._ PAGE
_ SIZE
 
 147              'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '  
 148              % ( offset
,  self
._ PAGE
_ SIZE
,  channel_id
))  
 149          doc 
=  self
._ download
_ xml
(  
 151              note
= 'Downloading channel info' ,  
 152              errnote
= 'Failed to download channel info' )  
 154          title 
=  doc
. find ( './/information/title' ). text
 
 155          description 
=  doc
. find ( './/information/detail' ). text
 
 156          for  asset 
in  doc
. findall ( './/teasers/teaser' ):  
 157              a_type 
=  asset
. find ( './type' ). text
 
 158              a_id 
=  asset
. find ( './details/assetId' ). text
 
 159              if  a_type 
not in  ( 'video' ,  'topic' ):  
 163                  'playlist_title' :  title
,  
 164                  'playlist_description' :  description
,  
 165                  'url' :  'zdf: %s : %s '  % ( a_type
,  a_id
),  
 168      def  _real_extract ( self
,  url
):  
 169          channel_id 
=  self
._ match
_ id
( url
)  
 170          entries 
=  OnDemandPagedList (  
 171              functools
. partial ( self
._ fetch
_ page
,  channel_id
),  self
._ PAGE
_ SIZE
)