]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py 
   2  from  __future__ 
import  unicode_literals
   7  from  . common 
import  InfoExtractor
  20  class  ZDFIE ( InfoExtractor
):   21      _VALID_URL 
=  r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'   24          'url' :  'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,   28              'title' :  'ZDFspezial - Ende des Machtpokers' ,   29              'description' :  'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,   31              'uploader' :  'spezial' ,   32              'uploader_id' :  '225948' ,   33              'upload_date' :  '20131127' ,   35          'skip' :  'Videos on ZDF.de are depublicised in short order' ,   38      def  _parse_smil_formats ( self
,  smil
,  smil_url
,  video_id
,  namespace
= None ,  f4m_params
= None ,  transform_rtmp_url
= None ):   40          for  param_group 
in  smil
. findall ( self
._ xpath
_ ns
( './head/paramGroup' ,  namespace
)):   41              group_id 
=  param_group
. attrib
. get ( self
._ xpath
_ ns
( 'id' ,  'http://www.w3.org/XML/1998/namespace' ))   43              for  param 
in  param_group
:   44                  params
[ param
. get ( 'name' )] =  param
. get ( 'value' )   45              param_groups
[ group_id
] =  params
  48          for  video 
in  smil
. findall ( self
._ xpath
_ ns
( './/video' ,  namespace
)):   49              src 
=  video
. get ( 'src' )   52              bitrate 
=  float_or_none ( video
. get ( 'system-bitrate' )  or  video
. get ( 'systemBitrate' ),  1000 )   53              group_id 
=  video
. get ( 'paramGroup' )   54              param_group 
=  param_groups
[ group_id
]   55              for  proto 
in  param_group
[ 'protocols' ]. split ( ',' ):   57                      'url' :  ' %s :// %s '  % ( proto
,  param_group
[ 'host' ]),   58                      'app' :  param_group
[ 'app' ],   61                      'format_id' :  ' %s-%d '  % ( proto
,  bitrate
),   64          self
._ sort
_ formats
( formats
)   67      def  extract_from_xml_url ( self
,  video_id
,  xml_url
):   68          doc 
=  self
._ download
_ xml
(   70              note
= 'Downloading video info' ,   71              errnote
= 'Failed to download video info' )   73          status_code 
=  doc
. find ( './status/statuscode' )   74          if  status_code 
is not None and  status_code
. text 
!=  'ok' :   75              code 
=  status_code
. text
  76              if  code 
==  'notVisibleAnymore' :   77                  message 
=  'Video  %s  is not available'  %  video_id
  79                  message 
=  ' %s  returned error:  %s '  % ( self
. IE_NAME
,  code
)   80              raise  ExtractorError ( message
,  expected
= True )   82          title 
=  doc
. find ( './/information/title' ). text
  83          description 
=  xpath_text ( doc
,  './/information/detail' ,  'description' )   84          duration 
=  int_or_none ( xpath_text ( doc
,  './/details/lengthSec' ,  'duration' ))   85          uploader 
=  xpath_text ( doc
,  './/details/originChannelTitle' ,  'uploader' )   86          uploader_id 
=  xpath_text ( doc
,  './/details/originChannelId' ,  'uploader id' )   87          upload_date 
=  unified_strdate ( xpath_text ( doc
,  './/details/airtime' ,  'upload date' ))   89          def  xml_to_thumbnails ( fnode
):   92                  thumbnail_url 
=  node
. text
  98                  if  'key'  in  node
. attrib
:   99                      m 
=  re
. match ( '^([0-9]+)x([0-9]+)$' ,  node
. attrib
[ 'key' ])  101                          thumbnail
[ 'width' ] =  int ( m
. group ( 1 ))  102                          thumbnail
[ 'height' ] =  int ( m
. group ( 2 ))  103                  thumbnails
. append ( thumbnail
)  106          thumbnails 
=  xml_to_thumbnails ( doc
. findall ( './/teaserimages/teaserimage' ))  108          format_nodes 
=  doc
. findall ( './/formitaeten/formitaet' )  109          quality 
=  qualities ([ 'veryhigh' ,  'high' ,  'med' ,  'low' ])  111          def  get_quality ( elem
):  112              return  quality ( xpath_text ( elem
,  'quality' ))  113          format_nodes
. sort ( key
= get_quality
)  116          for  fnode 
in  format_nodes
:  117              video_url 
=  fnode
. find ( 'url' ). text
 118              is_available 
=  'http://www.metafilegenerator'  not in  video_url
 121              format_id 
=  fnode
. attrib
[ 'basetype' ]  122              quality 
=  xpath_text ( fnode
,  './quality' ,  'quality' )  123              format_m 
=  re
. match ( r
'''(?x)  124                  (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_  125                  (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)  128              ext 
=  determine_ext ( video_url
,  None )  or  format_m
. group ( 'container' )  129              if  ext 
not in  ( 'smil' ,  'f4m' ,  'm3u8' ):  130                  format_id 
=  format_id 
+  '-'  +  quality
 131              if  format_id 
in  format_ids
:  137                  formats
. extend ( self
._ extract
_ smil
_ formats
(  138                      video_url
,  video_id
,  fatal
= False ))  140                  formats
. extend ( self
._ extract
_ m
3u8_ formats
(  141                      video_url
,  video_id
,  'mp4' ,  m3u8_id
= format_id
,  fatal
= False ))  143                  formats
. extend ( self
._ extract
_ f
4 m
_ formats
(  144                      video_url
,  video_id
,  f4m_id
= format_id
,  fatal
= False ))  146                  proto 
=  format_m
. group ( 'proto' ). lower ()  148                  abr 
=  int_or_none ( xpath_text ( fnode
,  './audioBitrate' ,  'abr' ),  1000 )  149                  vbr 
=  int_or_none ( xpath_text ( fnode
,  './videoBitrate' ,  'vbr' ),  1000 )  151                  width 
=  int_or_none ( xpath_text ( fnode
,  './width' ,  'width' ))  152                  height 
=  int_or_none ( xpath_text ( fnode
,  './height' ,  'height' ))  154                  filesize 
=  int_or_none ( xpath_text ( fnode
,  './filesize' ,  'filesize' ))  161                      'format_id' :  format_id
,  164                      'acodec' :  format_m
. group ( 'acodec' ),  165                      'vcodec' :  format_m
. group ( 'vcodec' ),  170                      'filesize' :  filesize
,  171                      'format_note' :  format_note
,  173                      '_available' :  is_available
,  175              format_ids
. append ( format_id
)  177          self
._ sort
_ formats
( formats
)  182              'description' :  description
,  183              'duration' :  duration
,  184              'thumbnails' :  thumbnails
,  185              'uploader' :  uploader
,  186              'uploader_id' :  uploader_id
,  187              'upload_date' :  upload_date
,  191      def  _real_extract ( self
,  url
):  192          video_id 
=  self
._ match
_ id
( url
)  193          xml_url 
=  'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s '  %  video_id
 194          return  self
. extract_from_xml_url ( video_id
,  xml_url
)  197  class  ZDFChannelIE ( InfoExtractor
):  198      _VALID_URL 
=  r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)'  200          'url' :  'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,  206          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332' ,  207          'only_matching' :  True ,  209          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332' ,  210          'only_matching' :  True ,  212          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off' ,  213          'only_matching' :  True ,  217      def  _fetch_page ( self
,  channel_id
,  page
):  218          offset 
=  page 
*  self
._ PAGE
_ SIZE
 220              'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '  221              % ( offset
,  self
._ PAGE
_ SIZE
,  channel_id
))  222          doc 
=  self
._ download
_ xml
(  224              note
= 'Downloading channel info' ,  225              errnote
= 'Failed to download channel info' )  227          title 
=  doc
. find ( './/information/title' ). text
 228          description 
=  doc
. find ( './/information/detail' ). text
 229          for  asset 
in  doc
. findall ( './/teasers/teaser' ):  230              a_type 
=  asset
. find ( './type' ). text
 231              a_id 
=  asset
. find ( './details/assetId' ). text
 232              if  a_type 
not in  ( 'video' ,  'topic' ):  236                  'playlist_title' :  title
,  237                  'playlist_description' :  description
,  238                  'url' :  'zdf: %s : %s '  % ( a_type
,  a_id
),  241      def  _real_extract ( self
,  url
):  242          channel_id 
=  self
._ match
_ id
( url
)  243          entries 
=  OnDemandPagedList (  244              functools
. partial ( self
._ fetch
_ page
,  channel_id
),  self
._ PAGE
_ SIZE
)