1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
8 class MDRIE(InfoExtractor
):
9 _VALID_URL
= r
'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
11 # No tests, MDR regularily deletes its videos
13 'url': 'http://www.mdr.de/fakt/video189002.html',
14 'only_matching': True,
17 def _real_extract(self
, url
):
18 m
= re
.match(self
._VALID
_URL
, url
)
19 video_id
= m
.group('video_id')
20 domain
= m
.group('domain')
22 # determine title and media streams from webpage
23 html
= self
._download
_webpage
(url
, video_id
)
25 title
= self
._html
_search
_regex
(r
'<h[12]>(.*?)</h[12]>', html
, 'title')
26 xmlurl
= self
._search
_regex
(
27 r
'dataURL:\'(/(?
:.+)/(?
:video|audio
)[0-9]+-avCustom
.xml
)', html, 'XML URL
')
29 doc = self._download_xml(domain + xmlurl, video_id)
31 for a in doc.findall('./assets
/asset
'):
32 url_el = a.find('.//progressiveDownloadUrl
')
35 abr = int(a.find('bitrateAudio
').text) // 1000
36 media_type = a.find('mediaType
').text
39 'filesize
': int(a.find('fileSize
').text),
43 vbr_el = a.find('bitrateVideo
')
47 'format_id
': '%s-%d' % (media_type, abr),
50 vbr = int(vbr_el.text) // 1000
53 'width
': int(a.find('frameWidth
').text),
54 'height
': int(a.find('frameHeight
').text),
55 'format_id
': '%s-%d' % (media_type, vbr),
57 formats.append(format)
58 self._sort_formats(formats)