]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mdr.py
3 from .common
import InfoExtractor
9 class MDRIE(InfoExtractor
):
10 _VALID_URL
= r
'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
12 # No tests, MDR regularily deletes its videos
14 def _real_extract(self
, url
):
15 m
= re
.match(self
._VALID
_URL
, url
)
16 video_id
= m
.group('video_id')
17 domain
= m
.group('domain')
19 # determine title and media streams from webpage
20 html
= self
._download
_webpage
(url
, video_id
)
22 title
= self
._html
_search
_regex
(r
'<h2>(.*?)</h2>', html
, u
'title')
23 xmlurl
= self
._search
_regex
(
24 r
'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html
, u
'XML URL')
26 doc
= self
._download
_xml
(domain
+ xmlurl
, video_id
)
28 for a
in doc
.findall('./assets/asset'):
29 url_el
= a
.find('.//progressiveDownloadUrl')
32 abr
= int(a
.find('bitrateAudio').text
) // 1000
33 media_type
= a
.find('mediaType').text
36 'filesize': int(a
.find('fileSize').text
),
40 vbr_el
= a
.find('bitrateVideo')
44 'format_id': u
'%s-%d' % (media_type
, abr
),
47 vbr
= int(vbr_el
.text
) // 1000
50 'width': int(a
.find('frameWidth').text
),
51 'height': int(a
.find('frameHeight').text
),
52 'format_id': u
'%s-%d' % (media_type
, vbr
),
54 formats
.append(format
)
56 raise ExtractorError(u
'Could not find any valid formats')
58 self
._sort
_formats
(formats
)