]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
20 class ZDFIE ( InfoExtractor
):
21 _VALID_URL
= r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
24 'url' : 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,
28 'title' : 'ZDFspezial - Ende des Machtpokers' ,
29 'description' : 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,
31 'uploader' : 'spezial' ,
32 'uploader_id' : '225948' ,
33 'upload_date' : '20131127' ,
35 'skip' : 'Videos on ZDF.de are depublicised in short order' ,
38 def _parse_smil_formats ( self
, smil
, smil_url
, video_id
, namespace
= None , f4m_params
= None , transform_rtmp_url
= None ):
40 for param_group
in smil
. findall ( self
._ xpath
_ ns
( './head/paramGroup' , namespace
)):
41 group_id
= param_group
. attrib
. get ( self
._ xpath
_ ns
( 'id' , 'http://www.w3.org/XML/1998/namespace' ))
43 for param
in param_group
:
44 params
[ param
. get ( 'name' )] = param
. get ( 'value' )
45 param_groups
[ group_id
] = params
48 for video
in smil
. findall ( self
._ xpath
_ ns
( './/video' , namespace
)):
49 src
= video
. get ( 'src' )
52 bitrate
= float_or_none ( video
. get ( 'system-bitrate' ) or video
. get ( 'systemBitrate' ), 1000 )
53 group_id
= video
. get ( 'paramGroup' )
54 param_group
= param_groups
[ group_id
]
55 for proto
in param_group
[ 'protocols' ]. split ( ',' ):
57 'url' : ' %s :// %s ' % ( proto
, param_group
[ 'host' ]),
58 'app' : param_group
[ 'app' ],
61 'format_id' : ' %s-%d ' % ( proto
, bitrate
),
64 self
._ sort
_ formats
( formats
)
67 def extract_from_xml_url ( self
, video_id
, xml_url
):
68 doc
= self
._ download
_ xml
(
70 note
= 'Downloading video info' ,
71 errnote
= 'Failed to download video info' )
73 status_code
= doc
. find ( './status/statuscode' )
74 if status_code
is not None and status_code
. text
!= 'ok' :
75 code
= status_code
. text
76 if code
== 'notVisibleAnymore' :
77 message
= 'Video %s is not available' % video_id
79 message
= ' %s returned error: %s ' % ( self
. IE_NAME
, code
)
80 raise ExtractorError ( message
, expected
= True )
82 title
= doc
. find ( './/information/title' ). text
83 description
= xpath_text ( doc
, './/information/detail' , 'description' )
84 duration
= int_or_none ( xpath_text ( doc
, './/details/lengthSec' , 'duration' ))
85 uploader
= xpath_text ( doc
, './/details/originChannelTitle' , 'uploader' )
86 uploader_id
= xpath_text ( doc
, './/details/originChannelId' , 'uploader id' )
87 upload_date
= unified_strdate ( xpath_text ( doc
, './/details/airtime' , 'upload date' ))
89 captions_url
= doc
. find ( './/caption/url' )
90 if captions_url
is not None :
92 'url' : captions_url
. text
,
96 def xml_to_thumbnails ( fnode
):
99 thumbnail_url
= node
. text
100 if not thumbnail_url
:
103 'url' : thumbnail_url
,
105 if 'key' in node
. attrib
:
106 m
= re
. match ( '^([0-9]+)x([0-9]+)$' , node
. attrib
[ 'key' ])
108 thumbnail
[ 'width' ] = int ( m
. group ( 1 ))
109 thumbnail
[ 'height' ] = int ( m
. group ( 2 ))
110 thumbnails
. append ( thumbnail
)
113 thumbnails
= xml_to_thumbnails ( doc
. findall ( './/teaserimages/teaserimage' ))
115 format_nodes
= doc
. findall ( './/formitaeten/formitaet' )
116 quality
= qualities ([ 'veryhigh' , 'high' , 'med' , 'low' ])
118 def get_quality ( elem
):
119 return quality ( xpath_text ( elem
, 'quality' ))
120 format_nodes
. sort ( key
= get_quality
)
123 for fnode
in format_nodes
:
124 video_url
= fnode
. find ( 'url' ). text
125 is_available
= 'http://www.metafilegenerator' not in video_url
128 format_id
= fnode
. attrib
[ 'basetype' ]
129 quality
= xpath_text ( fnode
, './quality' , 'quality' )
130 format_m
= re
. match ( r
'''(?x)
131 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
132 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
135 ext
= determine_ext ( video_url
, None ) or format_m
. group ( 'container' )
136 if ext
not in ( 'smil' , 'f4m' , 'm3u8' ):
137 format_id
= format_id
+ '-' + quality
138 if format_id
in format_ids
:
144 formats
. extend ( self
._ extract
_ smil
_ formats
(
145 video_url
, video_id
, fatal
= False ))
147 # the certificates are misconfigured (see
148 # https://github.com/rg3/youtube-dl/issues/8665)
149 if video_url
. startswith ( 'https://' ):
151 formats
. extend ( self
._ extract
_ m
3u8_ formats
(
152 video_url
, video_id
, 'mp4' , m3u8_id
= format_id
, fatal
= False ))
154 formats
. extend ( self
._ extract
_ f
4 m
_ formats
(
155 video_url
, video_id
, f4m_id
= format_id
, fatal
= False ))
157 proto
= format_m
. group ( 'proto' ). lower ()
159 abr
= int_or_none ( xpath_text ( fnode
, './audioBitrate' , 'abr' ), 1000 )
160 vbr
= int_or_none ( xpath_text ( fnode
, './videoBitrate' , 'vbr' ), 1000 )
162 width
= int_or_none ( xpath_text ( fnode
, './width' , 'width' ))
163 height
= int_or_none ( xpath_text ( fnode
, './height' , 'height' ))
165 filesize
= int_or_none ( xpath_text ( fnode
, './filesize' , 'filesize' ))
172 'format_id' : format_id
,
175 'acodec' : format_m
. group ( 'acodec' ),
176 'vcodec' : format_m
. group ( 'vcodec' ),
181 'filesize' : filesize
,
182 'format_note' : format_note
,
184 '_available' : is_available
,
186 format_ids
. append ( format_id
)
188 self
._ sort
_ formats
( formats
)
193 'description' : description
,
194 'duration' : duration
,
195 'thumbnails' : thumbnails
,
196 'uploader' : uploader
,
197 'uploader_id' : uploader_id
,
198 'upload_date' : upload_date
,
200 'subtitles' : subtitles
,
203 def _real_extract ( self
, url
):
204 video_id
= self
._ match
_ id
( url
)
205 xml_url
= 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
206 return self
. extract_from_xml_url ( video_id
, xml_url
)
209 class ZDFChannelIE ( InfoExtractor
):
210 _VALID_URL
= r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)'
212 'url' : 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,
218 'url' : 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332' ,
219 'only_matching' : True ,
221 'url' : 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332' ,
222 'only_matching' : True ,
224 'url' : 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off' ,
225 'only_matching' : True ,
229 def _fetch_page ( self
, channel_id
, page
):
230 offset
= page
* self
._ PAGE
_ SIZE
232 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '
233 % ( offset
, self
._ PAGE
_ SIZE
, channel_id
))
234 doc
= self
._ download
_ xml
(
236 note
= 'Downloading channel info' ,
237 errnote
= 'Failed to download channel info' )
239 title
= doc
. find ( './/information/title' ). text
240 description
= doc
. find ( './/information/detail' ). text
241 for asset
in doc
. findall ( './/teasers/teaser' ):
242 a_type
= asset
. find ( './type' ). text
243 a_id
= asset
. find ( './details/assetId' ). text
244 if a_type
not in ( 'video' , 'topic' ):
248 'playlist_title' : title
,
249 'playlist_description' : description
,
250 'url' : 'zdf: %s : %s ' % ( a_type
, a_id
),
253 def _real_extract ( self
, url
):
254 channel_id
= self
._ match
_ id
( url
)
255 entries
= OnDemandPagedList (
256 functools
. partial ( self
._ fetch
_ page
, channel_id
), self
._ PAGE
_ SIZE
)