]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
20 class ZDFIE ( InfoExtractor
):
21 _VALID_URL
= r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
24 'url' : 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,
28 'title' : 'ZDFspezial - Ende des Machtpokers' ,
29 'description' : 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,
31 'uploader' : 'spezial' ,
32 'uploader_id' : '225948' ,
33 'upload_date' : '20131127' ,
35 'skip' : 'Videos on ZDF.de are depublicised in short order' ,
38 def _parse_smil_formats ( self
, smil
, smil_url
, video_id
, namespace
= None , f4m_params
= None , transform_rtmp_url
= None ):
40 for param_group
in smil
. findall ( self
._ xpath
_ ns
( './head/paramGroup' , namespace
)):
41 group_id
= param_group
. attrib
. get ( self
._ xpath
_ ns
( 'id' , 'http://www.w3.org/XML/1998/namespace' ))
43 for param
in param_group
:
44 params
[ param
. get ( 'name' )] = param
. get ( 'value' )
45 param_groups
[ group_id
] = params
48 for video
in smil
. findall ( self
._ xpath
_ ns
( './/video' , namespace
)):
49 src
= video
. get ( 'src' )
52 bitrate
= float_or_none ( video
. get ( 'system-bitrate' ) or video
. get ( 'systemBitrate' ), 1000 )
53 group_id
= video
. get ( 'paramGroup' )
54 param_group
= param_groups
[ group_id
]
55 for proto
in param_group
[ 'protocols' ]. split ( ',' ):
57 'url' : ' %s :// %s ' % ( proto
, param_group
[ 'host' ]),
58 'app' : param_group
[ 'app' ],
61 'format_id' : ' %s-%d ' % ( proto
, bitrate
),
64 self
._ sort
_ formats
( formats
)
67 def extract_from_xml_url ( self
, video_id
, xml_url
):
68 doc
= self
._ download
_ xml
(
70 note
= 'Downloading video info' ,
71 errnote
= 'Failed to download video info' )
73 status_code
= doc
. find ( './status/statuscode' )
74 if status_code
is not None and status_code
. text
!= 'ok' :
75 code
= status_code
. text
76 if code
== 'notVisibleAnymore' :
77 message
= 'Video %s is not available' % video_id
79 message
= ' %s returned error: %s ' % ( self
. IE_NAME
, code
)
80 raise ExtractorError ( message
, expected
= True )
82 title
= doc
. find ( './/information/title' ). text
83 description
= xpath_text ( doc
, './/information/detail' , 'description' )
84 duration
= int_or_none ( xpath_text ( doc
, './/details/lengthSec' , 'duration' ))
85 uploader
= xpath_text ( doc
, './/details/originChannelTitle' , 'uploader' )
86 uploader_id
= xpath_text ( doc
, './/details/originChannelId' , 'uploader id' )
87 upload_date
= unified_strdate ( xpath_text ( doc
, './/details/airtime' , 'upload date' ))
89 def xml_to_thumbnails ( fnode
):
92 thumbnail_url
= node
. text
98 if 'key' in node
. attrib
:
99 m
= re
. match ( '^([0-9]+)x([0-9]+)$' , node
. attrib
[ 'key' ])
101 thumbnail
[ 'width' ] = int ( m
. group ( 1 ))
102 thumbnail
[ 'height' ] = int ( m
. group ( 2 ))
103 thumbnails
. append ( thumbnail
)
106 thumbnails
= xml_to_thumbnails ( doc
. findall ( './/teaserimages/teaserimage' ))
108 format_nodes
= doc
. findall ( './/formitaeten/formitaet' )
109 quality
= qualities ([ 'veryhigh' , 'high' , 'med' , 'low' ])
111 def get_quality ( elem
):
112 return quality ( xpath_text ( elem
, 'quality' ))
113 format_nodes
. sort ( key
= get_quality
)
116 for fnode
in format_nodes
:
117 video_url
= fnode
. find ( 'url' ). text
118 is_available
= 'http://www.metafilegenerator' not in video_url
121 format_id
= fnode
. attrib
[ 'basetype' ]
122 quality
= xpath_text ( fnode
, './quality' , 'quality' )
123 format_m
= re
. match ( r
'''(?x)
124 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
125 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
128 ext
= determine_ext ( video_url
, None ) or format_m
. group ( 'container' )
129 if ext
not in ( 'smil' , 'f4m' , 'm3u8' ):
130 format_id
= format_id
+ '-' + quality
131 if format_id
in format_ids
:
137 formats
. extend ( self
._ extract
_ smil
_ formats
(
138 video_url
, video_id
, fatal
= False ))
140 formats
. extend ( self
._ extract
_ m
3u8_ formats
(
141 video_url
, video_id
, 'mp4' , m3u8_id
= format_id
, fatal
= False ))
143 formats
. extend ( self
._ extract
_ f
4 m
_ formats
(
144 video_url
, video_id
, f4m_id
= format_id
, fatal
= False ))
146 proto
= format_m
. group ( 'proto' ). lower ()
148 abr
= int_or_none ( xpath_text ( fnode
, './audioBitrate' , 'abr' ), 1000 )
149 vbr
= int_or_none ( xpath_text ( fnode
, './videoBitrate' , 'vbr' ), 1000 )
151 width
= int_or_none ( xpath_text ( fnode
, './width' , 'width' ))
152 height
= int_or_none ( xpath_text ( fnode
, './height' , 'height' ))
154 filesize
= int_or_none ( xpath_text ( fnode
, './filesize' , 'filesize' ))
161 'format_id' : format_id
,
164 'acodec' : format_m
. group ( 'acodec' ),
165 'vcodec' : format_m
. group ( 'vcodec' ),
170 'filesize' : filesize
,
171 'format_note' : format_note
,
173 '_available' : is_available
,
175 format_ids
. append ( format_id
)
177 self
._ sort
_ formats
( formats
)
182 'description' : description
,
183 'duration' : duration
,
184 'thumbnails' : thumbnails
,
185 'uploader' : uploader
,
186 'uploader_id' : uploader_id
,
187 'upload_date' : upload_date
,
191 def _real_extract ( self
, url
):
192 video_id
= self
._ match
_ id
( url
)
193 xml_url
= 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
194 return self
. extract_from_xml_url ( video_id
, xml_url
)
197 class ZDFChannelIE ( InfoExtractor
):
198 _VALID_URL
= r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)'
200 'url' : 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,
206 'url' : 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332' ,
207 'only_matching' : True ,
209 'url' : 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332' ,
210 'only_matching' : True ,
212 'url' : 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off' ,
213 'only_matching' : True ,
217 def _fetch_page ( self
, channel_id
, page
):
218 offset
= page
* self
._ PAGE
_ SIZE
220 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '
221 % ( offset
, self
._ PAGE
_ SIZE
, channel_id
))
222 doc
= self
._ download
_ xml
(
224 note
= 'Downloading channel info' ,
225 errnote
= 'Failed to download channel info' )
227 title
= doc
. find ( './/information/title' ). text
228 description
= doc
. find ( './/information/detail' ). text
229 for asset
in doc
. findall ( './/teasers/teaser' ):
230 a_type
= asset
. find ( './type' ). text
231 a_id
= asset
. find ( './details/assetId' ). text
232 if a_type
not in ( 'video' , 'topic' ):
236 'playlist_title' : title
,
237 'playlist_description' : description
,
238 'url' : 'zdf: %s : %s ' % ( a_type
, a_id
),
241 def _real_extract ( self
, url
):
242 channel_id
= self
._ match
_ id
( url
)
243 entries
= OnDemandPagedList (
244 functools
. partial ( self
._ fetch
_ page
, channel_id
), self
._ PAGE
_ SIZE
)