]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   7  from  . common 
import  InfoExtractor
 
  20  class  ZDFIE ( InfoExtractor
):  
  21      _VALID_URL 
=  r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'  
  24          'url' :  'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,  
  28              'title' :  'ZDFspezial - Ende des Machtpokers' ,  
  29              'description' :  'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,  
  31              'uploader' :  'spezial' ,  
  32              'uploader_id' :  '225948' ,  
  33              'upload_date' :  '20131127' ,  
  35          'skip' :  'Videos on ZDF.de are depublicised in short order' ,  
  38      def  _parse_smil_formats ( self
,  smil
,  smil_url
,  video_id
,  namespace
= None ,  f4m_params
= None ,  transform_rtmp_url
= None ):  
  40          for  param_group 
in  smil
. findall ( self
._ xpath
_ ns
( './head/paramGroup' ,  namespace
)):  
  41              group_id 
=  param_group
. attrib
. get ( self
._ xpath
_ ns
( 'id' ,  'http://www.w3.org/XML/1998/namespace' ))  
  43              for  param 
in  param_group
:  
  44                  params
[ param
. get ( 'name' )] =  param
. get ( 'value' )  
  45              param_groups
[ group_id
] =  params
 
  48          for  video 
in  smil
. findall ( self
._ xpath
_ ns
( './/video' ,  namespace
)):  
  49              src 
=  video
. get ( 'src' )  
  52              bitrate 
=  float_or_none ( video
. get ( 'system-bitrate' )  or  video
. get ( 'systemBitrate' ),  1000 )  
  53              group_id 
=  video
. get ( 'paramGroup' )  
  54              param_group 
=  param_groups
[ group_id
]  
  55              for  proto 
in  param_group
[ 'protocols' ]. split ( ',' ):  
  57                      'url' :  ' %s :// %s '  % ( proto
,  param_group
[ 'host' ]),  
  58                      'app' :  param_group
[ 'app' ],  
  61                      'format_id' :  ' %s-%d '  % ( proto
,  bitrate
),  
  64          self
._ sort
_ formats
( formats
)  
  67      def  extract_from_xml_url ( self
,  video_id
,  xml_url
):  
  68          doc 
=  self
._ download
_ xml
(  
  70              note
= 'Downloading video info' ,  
  71              errnote
= 'Failed to download video info' )  
  73          status_code 
=  doc
. find ( './status/statuscode' )  
  74          if  status_code 
is not None and  status_code
. text 
!=  'ok' :  
  75              code 
=  status_code
. text
 
  76              if  code 
==  'notVisibleAnymore' :  
  77                  message 
=  'Video  %s  is not available'  %  video_id
 
  79                  message 
=  ' %s  returned error:  %s '  % ( self
. IE_NAME
,  code
)  
  80              raise  ExtractorError ( message
,  expected
= True )  
  82          title 
=  doc
. find ( './/information/title' ). text
 
  83          description 
=  xpath_text ( doc
,  './/information/detail' ,  'description' )  
  84          duration 
=  int_or_none ( xpath_text ( doc
,  './/details/lengthSec' ,  'duration' ))  
  85          uploader 
=  xpath_text ( doc
,  './/details/originChannelTitle' ,  'uploader' )  
  86          uploader_id 
=  xpath_text ( doc
,  './/details/originChannelId' ,  'uploader id' )  
  87          upload_date 
=  unified_strdate ( xpath_text ( doc
,  './/details/airtime' ,  'upload date' ))  
  89          captions_url 
=  doc
. find ( './/caption/url' )  
  90          if  captions_url 
is not None :  
  92                  'url' :  captions_url
. text
,  
  96          def  xml_to_thumbnails ( fnode
):  
  99                  thumbnail_url 
=  node
. text
 
 100                  if not  thumbnail_url
:  
 103                      'url' :  thumbnail_url
,  
 105                  if  'key'  in  node
. attrib
:  
 106                      m 
=  re
. match ( '^([0-9]+)x([0-9]+)$' ,  node
. attrib
[ 'key' ])  
 108                          thumbnail
[ 'width' ] =  int ( m
. group ( 1 ))  
 109                          thumbnail
[ 'height' ] =  int ( m
. group ( 2 ))  
 110                  thumbnails
. append ( thumbnail
)  
 113          thumbnails 
=  xml_to_thumbnails ( doc
. findall ( './/teaserimages/teaserimage' ))  
 115          format_nodes 
=  doc
. findall ( './/formitaeten/formitaet' )  
 116          quality 
=  qualities ([ 'veryhigh' ,  'high' ,  'med' ,  'low' ])  
 118          def  get_quality ( elem
):  
 119              return  quality ( xpath_text ( elem
,  'quality' ))  
 120          format_nodes
. sort ( key
= get_quality
)  
 123          for  fnode 
in  format_nodes
:  
 124              video_url 
=  fnode
. find ( 'url' ). text
 
 125              is_available 
=  'http://www.metafilegenerator'  not in  video_url
 
 128              format_id 
=  fnode
. attrib
[ 'basetype' ]  
 129              quality 
=  xpath_text ( fnode
,  './quality' ,  'quality' )  
 130              format_m 
=  re
. match ( r
'''(?x)  
 131                  (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_  
 132                  (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)  
 135              ext 
=  determine_ext ( video_url
,  None )  or  format_m
. group ( 'container' )  
 136              if  ext 
not in  ( 'smil' ,  'f4m' ,  'm3u8' ):  
 137                  format_id 
=  format_id 
+  '-'  +  quality
 
 138              if  format_id 
in  format_ids
:  
 144                  formats
. extend ( self
._ extract
_ smil
_ formats
(  
 145                      video_url
,  video_id
,  fatal
= False ))  
 147                  # the certificates are misconfigured (see  
 148                  # https://github.com/rg3/youtube-dl/issues/8665)  
 149                  if  video_url
. startswith ( 'https://' ):  
 151                  formats
. extend ( self
._ extract
_ m
3u8_ formats
(  
 152                      video_url
,  video_id
,  'mp4' ,  m3u8_id
= format_id
,  fatal
= False ))  
 154                  formats
. extend ( self
._ extract
_ f
4 m
_ formats
(  
 155                      video_url
,  video_id
,  f4m_id
= format_id
,  fatal
= False ))  
 157                  proto 
=  format_m
. group ( 'proto' ). lower ()  
 159                  abr 
=  int_or_none ( xpath_text ( fnode
,  './audioBitrate' ,  'abr' ),  1000 )  
 160                  vbr 
=  int_or_none ( xpath_text ( fnode
,  './videoBitrate' ,  'vbr' ),  1000 )  
 162                  width 
=  int_or_none ( xpath_text ( fnode
,  './width' ,  'width' ))  
 163                  height 
=  int_or_none ( xpath_text ( fnode
,  './height' ,  'height' ))  
 165                  filesize 
=  int_or_none ( xpath_text ( fnode
,  './filesize' ,  'filesize' ))  
 172                      'format_id' :  format_id
,  
 175                      'acodec' :  format_m
. group ( 'acodec' ),  
 176                      'vcodec' :  format_m
. group ( 'vcodec' ),  
 181                      'filesize' :  filesize
,  
 182                      'format_note' :  format_note
,  
 184                      '_available' :  is_available
,  
 186              format_ids
. append ( format_id
)  
 188          self
._ sort
_ formats
( formats
)  
 193              'description' :  description
,  
 194              'duration' :  duration
,  
 195              'thumbnails' :  thumbnails
,  
 196              'uploader' :  uploader
,  
 197              'uploader_id' :  uploader_id
,  
 198              'upload_date' :  upload_date
,  
 200              'subtitles' :  subtitles
,  
 203      def  _real_extract ( self
,  url
):  
 204          video_id 
=  self
._ match
_ id
( url
)  
 205          xml_url 
=  'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s '  %  video_id
 
 206          return  self
. extract_from_xml_url ( video_id
,  xml_url
)  
 209  class  ZDFChannelIE ( InfoExtractor
):  
 210      _VALID_URL 
=  r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)'  
 212          'url' :  'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,  
 218          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332' ,  
 219          'only_matching' :  True ,  
 221          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332' ,  
 222          'only_matching' :  True ,  
 224          'url' :  'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off' ,  
 225          'only_matching' :  True ,  
 229      def  _fetch_page ( self
,  channel_id
,  page
):  
 230          offset 
=  page 
*  self
._ PAGE
_ SIZE
 
 232              'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '  
 233              % ( offset
,  self
._ PAGE
_ SIZE
,  channel_id
))  
 234          doc 
=  self
._ download
_ xml
(  
 236              note
= 'Downloading channel info' ,  
 237              errnote
= 'Failed to download channel info' )  
 239          title 
=  doc
. find ( './/information/title' ). text
 
 240          description 
=  doc
. find ( './/information/detail' ). text
 
 241          for  asset 
in  doc
. findall ( './/teasers/teaser' ):  
 242              a_type 
=  asset
. find ( './type' ). text
 
 243              a_id 
=  asset
. find ( './details/assetId' ). text
 
 244              if  a_type 
not in  ( 'video' ,  'topic' ):  
 248                  'playlist_title' :  title
,  
 249                  'playlist_description' :  description
,  
 250                  'url' :  'zdf: %s : %s '  % ( a_type
,  a_id
),  
 253      def  _real_extract ( self
,  url
):  
 254          channel_id 
=  self
._ match
_ id
( url
)  
 255          entries 
=  OnDemandPagedList (  
 256              functools
. partial ( self
._ fetch
_ page
,  channel_id
),  self
._ PAGE
_ SIZE
)