]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   7  from  . common 
import  InfoExtractor
 
  20  class  ZDFIE ( InfoExtractor
):  
  21      _VALID_URL 
=  r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'  
  24          'url' :  'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,  
  28              'title' :  'ZDFspezial - Ende des Machtpokers' ,  
  29              'description' :  'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,  
  31              'uploader' :  'spezial' ,  
  32              'uploader_id' :  '225948' ,  
  33              'upload_date' :  '20131127' ,  
  35          'skip' :  'Videos on ZDF.de are depublicised in short order' ,  
  38      def  _parse_smil_formats ( self
,  smil
,  smil_url
,  video_id
,  namespace
= None ,  f4m_params
= None ,  transform_rtmp_url
= None ):  
  40          for  param_group 
in  smil
. findall ( self
._ xpath
_ ns
( './head/paramGroup' ,  namespace
)):  
  41              group_id 
=  param_group
. attrib
. get ( self
._ xpath
_ ns
( 'id' ,  'http://www.w3.org/XML/1998/namespace' ))  
  43              for  param 
in  param_group
:  
  44                  params
[ param
. get ( 'name' )] =  param
. get ( 'value' )  
  45              param_groups
[ group_id
] =  params
 
  48          for  video 
in  smil
. findall ( self
._ xpath
_ ns
( './/video' ,  namespace
)):  
  49              src 
=  video
. get ( 'src' )  
  52              bitrate 
=  float_or_none ( video
. get ( 'system-bitrate' )  or  video
. get ( 'systemBitrate' ),  1000 )  
  53              group_id 
=  video
. get ( 'paramGroup' )  
  54              param_group 
=  param_groups
[ group_id
]  
  55              for  proto 
in  param_group
[ 'protocols' ]. split ( ',' ):  
  57                      'url' :  ' %s :// %s '  % ( proto
,  param_group
[ 'host' ]),  
  58                      'app' :  param_group
[ 'app' ],  
  61                      'format_id' :  ' %s-%d '  % ( proto
,  bitrate
),  
  64          self
._ sort
_ formats
( formats
)  
  67      def  extract_from_xml_url ( self
,  video_id
,  xml_url
):  
  68          doc 
=  self
._ download
_ xml
(  
  70              note
= 'Downloading video info' ,  
  71              errnote
= 'Failed to download video info' )  
  73          status_code 
=  doc
. find ( './status/statuscode' )  
  74          if  status_code 
is not None and  status_code
. text 
!=  'ok' :  
  75              code 
=  status_code
. text
 
  76              if  code 
==  'notVisibleAnymore' :  
  77                  message 
=  'Video  %s  is not available'  %  video_id
 
  79                  message 
=  ' %s  returned error:  %s '  % ( self
. IE_NAME
,  code
)  
  80              raise  ExtractorError ( message
,  expected
= True )  
  82          title 
=  doc
. find ( './/information/title' ). text
 
  83          description 
=  xpath_text ( doc
,  './/information/detail' ,  'description' )  
  84          duration 
=  int_or_none ( xpath_text ( doc
,  './/details/lengthSec' ,  'duration' ))  
  85          uploader 
=  xpath_text ( doc
,  './/details/originChannelTitle' ,  'uploader' )  
  86          uploader_id 
=  xpath_text ( doc
,  './/details/originChannelId' ,  'uploader id' )  
  87          upload_date 
=  unified_strdate ( xpath_text ( doc
,  './/details/airtime' ,  'upload date' ))  
  89          def  xml_to_thumbnails ( fnode
):  
  92                  thumbnail_url 
=  node
. text
 
  98                  if  'key'  in  node
. attrib
:  
  99                      m 
=  re
. match ( '^([0-9]+)x([0-9]+)$' ,  node
. attrib
[ 'key' ])  
 101                          thumbnail
[ 'width' ] =  int ( m
. group ( 1 ))  
 102                          thumbnail
[ 'height' ] =  int ( m
. group ( 2 ))  
 103                  thumbnails
. append ( thumbnail
)  
 106          thumbnails 
=  xml_to_thumbnails ( doc
. findall ( './/teaserimages/teaserimage' ))  
 108          format_nodes 
=  doc
. findall ( './/formitaeten/formitaet' )  
 109          quality 
=  qualities ([ 'veryhigh' ,  'high' ,  'med' ,  'low' ])  
 111          def  get_quality ( elem
):  
 112              return  quality ( xpath_text ( elem
,  'quality' ))  
 113          format_nodes
. sort ( key
= get_quality
)  
 116          for  fnode 
in  format_nodes
:  
 117              video_url 
=  fnode
. find ( 'url' ). text
 
 118              is_available 
=  'http://www.metafilegenerator'  not in  video_url
 
 121              format_id 
=  fnode
. attrib
[ 'basetype' ]  
 122              quality 
=  xpath_text ( fnode
,  './quality' ,  'quality' )  
 123              format_m 
=  re
. match ( r
'''(?x)  
 124                  (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_  
 125                  (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)  
 128              ext 
=  determine_ext ( video_url
,  None )  or  format_m
. group ( 'container' )  
 129              if  ext 
not in  ( 'smil' ,  'f4m' ,  'm3u8' ):  
 130                  format_id 
=  format_id 
+  '-'  +  quality
 
 131              if  format_id 
in  format_ids
:  
 137                  formats
. extend ( self
._ extract
_ smil
_ formats
(  
 138                      video_url
,  video_id
,  fatal
= False ))  
 140                  formats
. extend ( self
._ extract
_ m
3u8_ formats
(  
 141                      video_url
,  video_id
,  'mp4' ,  m3u8_id
= format_id
,  fatal
= False ))  
 143                  formats
. extend ( self
._ extract
_ f
4 m
_ formats
(  
 144                      video_url
,  video_id
,  f4m_id
= format_id
,  fatal
= False ))  
 146                  proto 
=  format_m
. group ( 'proto' ). lower ()  
 148                  abr 
=  int_or_none ( xpath_text ( fnode
,  './audioBitrate' ,  'abr' ),  1000 )  
 149                  vbr 
=  int_or_none ( xpath_text ( fnode
,  './videoBitrate' ,  'vbr' ),  1000 )  
 151                  width 
=  int_or_none ( xpath_text ( fnode
,  './width' ,  'width' ))  
 152                  height 
=  int_or_none ( xpath_text ( fnode
,  './height' ,  'height' ))  
 154                  filesize 
=  int_or_none ( xpath_text ( fnode
,  './filesize' ,  'filesize' ))  
 161                      'format_id' :  format_id
,  
 164                      'acodec' :  format_m
. group ( 'acodec' ),  
 165                      'vcodec' :  format_m
. group ( 'vcodec' ),  
 170                      'filesize' :  filesize
,  
 171                      'format_note' :  format_note
,  
 173                      '_available' :  is_available
,  
 175              format_ids
. append ( format_id
)  
 177          self
._ sort
_ formats
( formats
)  
 182              'description' :  description
,  
 183              'duration' :  duration
,  
 184              'thumbnails' :  thumbnails
,  
 185              'uploader' :  uploader
,  
 186              'uploader_id' :  uploader_id
,  
 187              'upload_date' :  upload_date
,  
 191      def  _real_extract ( self
,  url
):  
 192          video_id 
=  self
._ match
_ id
( url
)  
 193          xml_url 
=  'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s '  %  video_id
 
 194          return  self
. extract_from_xml_url ( video_id
,  xml_url
)  
 197  class  ZDFChannelIE ( InfoExtractor
):  
 198      _VALID_URL 
=  r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)'  
 200          'url' :  'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,  
 206          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332' ,  
 207          'only_matching' :  True ,  
 209          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332' ,  
 210          'only_matching' :  True ,  
 212          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off' ,  
 213          'only_matching' :  True ,  
 217      def  _fetch_page ( self
,  channel_id
,  page
):  
 218          offset 
=  page 
*  self
._ PAGE
_ SIZE
 
 220              'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '  
 221              % ( offset
,  self
._ PAGE
_ SIZE
,  channel_id
))  
 222          doc 
=  self
._ download
_ xml
(  
 224              note
= 'Downloading channel info' ,  
 225              errnote
= 'Failed to download channel info' )  
 227          title 
=  doc
. find ( './/information/title' ). text
 
 228          description 
=  doc
. find ( './/information/detail' ). text
 
 229          for  asset 
in  doc
. findall ( './/teasers/teaser' ):  
 230              a_type 
=  asset
. find ( './type' ). text
 
 231              a_id 
=  asset
. find ( './details/assetId' ). text
 
 232              if  a_type 
not in  ( 'video' ,  'topic' ):  
 236                  'playlist_title' :  title
,  
 237                  'playlist_description' :  description
,  
 238                  'url' :  'zdf: %s : %s '  % ( a_type
,  a_id
),  
 241      def  _real_extract ( self
,  url
):  
 242          channel_id 
=  self
._ match
_ id
( url
)  
 243          entries 
=  OnDemandPagedList (  
 244              functools
. partial ( self
._ fetch
_ page
,  channel_id
),  self
._ PAGE
_ SIZE
)