]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py
a795f56b37bbd710c895b0e255342057a5aa354f
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
16 def extract_from_xml_url ( ie
, video_id
, xml_url
):
17 doc
= ie
._ download
_ xml
(
19 note
= 'Downloading video info' ,
20 errnote
= 'Failed to download video info' )
22 title
= doc
. find ( './/information/title' ). text
23 description
= xpath_text ( doc
, './/information/detail' , 'description' )
24 duration
= int_or_none ( xpath_text ( doc
, './/details/lengthSec' , 'duration' ))
25 uploader
= xpath_text ( doc
, './/details/originChannelTitle' , 'uploader' )
26 uploader_id
= xpath_text ( doc
, './/details/originChannelId' , 'uploader id' )
27 upload_date
= unified_strdate ( xpath_text ( doc
, './/details/airtime' , 'upload date' ))
29 def xml_to_format ( fnode
):
30 video_url
= fnode
. find ( 'url' ). text
31 is_available
= 'http://www.metafilegenerator' not in video_url
33 format_id
= fnode
. attrib
[ 'basetype' ]
34 format_m
= re
. match ( r
'''(?x)
35 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
36 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
39 ext
= format_m
. group ( 'container' )
40 proto
= format_m
. group ( 'proto' ). lower ()
42 quality
= xpath_text ( fnode
, './quality' , 'quality' )
43 abr
= int_or_none ( xpath_text ( fnode
, './audioBitrate' , 'abr' ), 1000 )
44 vbr
= int_or_none ( xpath_text ( fnode
, './videoBitrate' , 'vbr' ), 1000 )
46 width
= int_or_none ( xpath_text ( fnode
, './width' , 'width' ))
47 height
= int_or_none ( xpath_text ( fnode
, './height' , 'height' ))
49 filesize
= int_or_none ( xpath_text ( fnode
, './filesize' , 'filesize' ))
56 'format_id' : format_id
+ '-' + quality
,
59 'acodec' : format_m
. group ( 'acodec' ),
60 'vcodec' : format_m
. group ( 'vcodec' ),
66 'format_note' : format_note
,
68 '_available' : is_available
,
71 def xml_to_thumbnails ( fnode
):
74 thumbnail_url
= node
. text
80 if 'key' in node
. attrib
:
81 m
= re
. match ( '^([0-9]+)x([0-9]+)$' , node
. attrib
[ 'key' ])
83 thumbnail
[ 'width' ] = int ( m
. group ( 1 ))
84 thumbnail
[ 'height' ] = int ( m
. group ( 2 ))
85 thumbnails
. append ( thumbnail
)
88 thumbnails
= xml_to_thumbnails ( doc
. findall ( './/teaserimages/teaserimage' ))
90 format_nodes
= doc
. findall ( './/formitaeten/formitaet' )
91 formats
= list ( filter (
92 lambda f
: f
[ '_available' ],
93 map ( xml_to_format
, format_nodes
)))
94 ie
._ sort
_ formats
( formats
)
99 'description' : description
,
100 'duration' : duration
,
101 'thumbnails' : thumbnails
,
102 'uploader' : uploader
,
103 'uploader_id' : uploader_id
,
104 'upload_date' : upload_date
,
109 class ZDFIE ( InfoExtractor
):
110 _VALID_URL
= r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
113 'url' : 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,
117 'title' : 'ZDFspezial - Ende des Machtpokers' ,
118 'description' : 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,
120 'uploader' : 'spezial' ,
121 'uploader_id' : '225948' ,
122 'upload_date' : '20131127' ,
124 'skip' : 'Videos on ZDF.de are depublicised in short order' ,
127 def _real_extract ( self
, url
):
128 video_id
= self
._ match
_ id
( url
)
129 xml_url
= 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
130 return extract_from_xml_url ( self
, video_id
, xml_url
)
133 class ZDFChannelIE ( InfoExtractor
):
134 _VALID_URL
= r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
136 'url' : 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,
144 def _fetch_page ( self
, channel_id
, page
):
145 offset
= page
* self
._ PAGE
_ SIZE
147 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '
148 % ( offset
, self
._ PAGE
_ SIZE
, channel_id
))
149 doc
= self
._ download
_ xml
(
151 note
= 'Downloading channel info' ,
152 errnote
= 'Failed to download channel info' )
154 title
= doc
. find ( './/information/title' ). text
155 description
= doc
. find ( './/information/detail' ). text
156 for asset
in doc
. findall ( './/teasers/teaser' ):
157 a_type
= asset
. find ( './type' ). text
158 a_id
= asset
. find ( './details/assetId' ). text
159 if a_type
not in ( 'video' , 'topic' ):
163 'playlist_title' : title
,
164 'playlist_description' : description
,
165 'url' : 'zdf: %s : %s ' % ( a_type
, a_id
),
168 def _real_extract ( self
, url
):
169 channel_id
= self
._ match
_ id
( url
)
170 entries
= OnDemandPagedList (
171 functools
. partial ( self
._ fetch
_ page
, channel_id
), self
._ PAGE
_ SIZE
)