]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dreisat.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
17 class DreiSatIE(InfoExtractor
):
19 _VALID_URL
= r
'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
22 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
23 'md5': 'be37228896d30a88f315b638900a026e',
27 'title': 'Waidmannsheil',
28 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
29 'uploader': 'SCHWEIZWEIT',
30 'uploader_id': '100000210',
31 'upload_date': '20140913'
34 'skip_download': True, # m3u8 downloads
38 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
39 'only_matching': True,
43 def _parse_smil_formats(self
, smil
, smil_url
, video_id
, namespace
=None, f4m_params
=None, transform_rtmp_url
=None):
45 for param_group
in smil
.findall(self
._xpath
_ns
('./head/paramGroup', namespace
)):
46 group_id
= param_group
.attrib
.get(self
._xpath
_ns
('id', 'http://www.w3.org/XML/1998/namespace'))
48 for param
in param_group
:
49 params
[param
.get('name')] = param
.get('value')
50 param_groups
[group_id
] = params
53 for video
in smil
.findall(self
._xpath
_ns
('.//video', namespace
)):
54 src
= video
.get('src')
57 bitrate
= float_or_none(video
.get('system-bitrate') or video
.get('systemBitrate'), 1000)
58 group_id
= video
.get('paramGroup')
59 param_group
= param_groups
[group_id
]
60 for proto
in param_group
['protocols'].split(','):
62 'url': '%s://%s' % (proto
, param_group
['host']),
63 'app': param_group
['app'],
66 'format_id': '%s-%d' % (proto
, bitrate
),
69 self
._sort
_formats
(formats
)
72 def extract_from_xml_url(self
, video_id
, xml_url
):
73 doc
= self
._download
_xml
(
75 note
='Downloading video info',
76 errnote
='Failed to download video info')
78 status_code
= doc
.find('./status/statuscode')
79 if status_code
is not None and status_code
.text
!= 'ok':
80 code
= status_code
.text
81 if code
== 'notVisibleAnymore':
82 message
= 'Video %s is not available' % video_id
84 message
= '%s returned error: %s' % (self
.IE_NAME
, code
)
85 raise ExtractorError(message
, expected
=True)
87 title
= doc
.find('.//information/title').text
88 description
= xpath_text(doc
, './/information/detail', 'description')
89 duration
= int_or_none(xpath_text(doc
, './/details/lengthSec', 'duration'))
90 uploader
= xpath_text(doc
, './/details/originChannelTitle', 'uploader')
91 uploader_id
= xpath_text(doc
, './/details/originChannelId', 'uploader id')
92 upload_date
= unified_strdate(xpath_text(doc
, './/details/airtime', 'upload date'))
94 def xml_to_thumbnails(fnode
):
97 thumbnail_url
= node
.text
101 'url': thumbnail_url
,
103 if 'key' in node
.attrib
:
104 m
= re
.match('^([0-9]+)x([0-9]+)$', node
.attrib
['key'])
106 thumbnail
['width'] = int(m
.group(1))
107 thumbnail
['height'] = int(m
.group(2))
108 thumbnails
.append(thumbnail
)
111 thumbnails
= xml_to_thumbnails(doc
.findall('.//teaserimages/teaserimage'))
113 format_nodes
= doc
.findall('.//formitaeten/formitaet')
114 quality
= qualities(['veryhigh', 'high', 'med', 'low'])
116 def get_quality(elem
):
117 return quality(xpath_text(elem
, 'quality'))
118 format_nodes
.sort(key
=get_quality
)
121 for fnode
in format_nodes
:
122 video_url
= fnode
.find('url').text
123 is_available
= 'http://www.metafilegenerator' not in video_url
126 format_id
= fnode
.attrib
['basetype']
127 quality
= xpath_text(fnode
, './quality', 'quality')
128 format_m
= re
.match(r
'''(?x)
129 (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
130 (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
133 ext
= determine_ext(video_url
, None) or format_m
.group('container')
134 if ext
not in ('smil', 'f4m', 'm3u8'):
135 format_id
= format_id
+ '-' + quality
136 if format_id
in format_ids
:
142 formats
.extend(self
._extract
_smil
_formats
(
143 video_url
, video_id
, fatal
=False))
145 # the certificates are misconfigured (see
146 # https://github.com/rg3/youtube-dl/issues/8665)
147 if video_url
.startswith('https://'):
149 formats
.extend(self
._extract
_m
3u8_formats
(
150 video_url
, video_id
, 'mp4', m3u8_id
=format_id
, fatal
=False))
152 formats
.extend(self
._extract
_f
4m
_formats
(
153 video_url
, video_id
, f4m_id
=format_id
, fatal
=False))
155 proto
= format_m
.group('proto').lower()
157 abr
= int_or_none(xpath_text(fnode
, './audioBitrate', 'abr'), 1000)
158 vbr
= int_or_none(xpath_text(fnode
, './videoBitrate', 'vbr'), 1000)
160 width
= int_or_none(xpath_text(fnode
, './width', 'width'))
161 height
= int_or_none(xpath_text(fnode
, './height', 'height'))
163 filesize
= int_or_none(xpath_text(fnode
, './filesize', 'filesize'))
170 'format_id': format_id
,
173 'acodec': format_m
.group('acodec'),
174 'vcodec': format_m
.group('vcodec'),
179 'filesize': filesize
,
180 'format_note': format_note
,
182 '_available': is_available
,
184 format_ids
.append(format_id
)
186 self
._sort
_formats
(formats
)
191 'description': description
,
192 'duration': duration
,
193 'thumbnails': thumbnails
,
194 'uploader': uploader
,
195 'uploader_id': uploader_id
,
196 'upload_date': upload_date
,
200 def _real_extract(self
, url
):
201 mobj
= re
.match(self
._VALID
_URL
, url
)
202 video_id
= mobj
.group('id')
203 details_url
= 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
204 return self
.extract_from_xml_url(video_id
, details_url
)