]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zdf.py
98f15177bd6665bd1c6b96a071d59d4b67e5d918
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
15 def extract_from_xml_url ( ie
, video_id
, xml_url
):
16 doc
= ie
._ download
_ xml
(
18 note
= 'Downloading video info' ,
19 errnote
= 'Failed to download video info' )
21 title
= doc
. find ( './/information/title' ). text
22 description
= doc
. find ( './/information/detail' ). text
23 duration
= int ( doc
. find ( './/details/lengthSec' ). text
)
24 uploader_node
= doc
. find ( './/details/originChannelTitle' )
25 uploader
= None if uploader_node
is None else uploader_node
. text
26 uploader_id_node
= doc
. find ( './/details/originChannelId' )
27 uploader_id
= None if uploader_id_node
is None else uploader_id_node
. text
28 upload_date
= unified_strdate ( doc
. find ( './/details/airtime' ). text
)
30 def xml_to_format ( fnode
):
31 video_url
= fnode
. find ( 'url' ). text
32 is_available
= 'http://www.metafilegenerator' not in video_url
34 format_id
= fnode
. attrib
[ 'basetype' ]
35 format_m
= re
. match ( r
'''(?x)
36 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
37 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
40 ext
= format_m
. group ( 'container' )
41 proto
= format_m
. group ( 'proto' ). lower ()
43 quality
= fnode
. find ( './quality' ). text
44 abr
= int ( fnode
. find ( './audioBitrate' ). text
) // 1000
45 vbr_node
= fnode
. find ( './videoBitrate' )
46 vbr
= None if vbr_node
is None else int ( vbr_node
. text
) // 1000
48 width_node
= fnode
. find ( './width' )
49 width
= None if width_node
is None else int_or_none ( width_node
. text
)
50 height_node
= fnode
. find ( './height' )
51 height
= None if height_node
is None else int_or_none ( height_node
. text
)
58 'format_id' : format_id
+ '-' + quality
,
61 'acodec' : format_m
. group ( 'acodec' ),
62 'vcodec' : format_m
. group ( 'vcodec' ),
67 'filesize' : int_or_none ( fnode
. find ( './filesize' ). text
),
68 'format_note' : format_note
,
70 '_available' : is_available
,
73 format_nodes
= doc
. findall ( './/formitaeten/formitaet' )
74 formats
= list ( filter (
75 lambda f
: f
[ '_available' ],
76 map ( xml_to_format
, format_nodes
)))
77 ie
._ sort
_ formats
( formats
)
82 'description' : description
,
85 'uploader_id' : uploader_id
,
86 'upload_date' : upload_date
,
91 class ZDFIE ( InfoExtractor
):
92 _VALID_URL
= r
'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
95 'url' : 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt' ,
99 'title' : 'ZDFspezial - Ende des Machtpokers' ,
100 'description' : 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".' ,
102 'uploader' : 'spezial' ,
103 'uploader_id' : '225948' ,
104 'upload_date' : '20131127' ,
106 'skip' : 'Videos on ZDF.de are depublicised in short order' ,
109 def _real_extract ( self
, url
):
110 video_id
= self
._ match
_ id
( url
)
111 xml_url
= 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
112 return extract_from_xml_url ( self
, video_id
, xml_url
)
115 class ZDFChannelIE ( InfoExtractor
):
116 _VALID_URL
= r
'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
118 'url' : 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic' ,
126 def _fetch_page ( self
, channel_id
, page
):
127 offset
= page
* self
._ PAGE
_ SIZE
129 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '
130 % ( offset
, self
._ PAGE
_ SIZE
, channel_id
))
131 doc
= self
._ download
_ xml
(
133 note
= 'Downloading channel info' ,
134 errnote
= 'Failed to download channel info' )
136 title
= doc
. find ( './/information/title' ). text
137 description
= doc
. find ( './/information/detail' ). text
138 for asset
in doc
. findall ( './/teasers/teaser' ):
139 a_type
= asset
. find ( './type' ). text
140 a_id
= asset
. find ( './details/assetId' ). text
141 if a_type
not in ( 'video' , 'topic' ):
145 'playlist_title' : title
,
146 'playlist_description' : description
,
147 'url' : 'zdf: %s : %s ' % ( a_type
, a_id
),
150 def _real_extract ( self
, url
):
151 channel_id
= self
._ match
_ id
( url
)
152 entries
= OnDemandPagedList (
153 functools
. partial ( self
._ fetch
_ page
, channel_id
), self
._ PAGE
_ SIZE
)