]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ndr.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
14 class NDRIE(InfoExtractor
):
16 IE_DESC
= 'NDR.de - Mediathek'
17 _VALID_URL
= r
'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
21 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
22 'md5': '4a4eeafd17c3058b65f0c8f091355855',
27 'title': 'Blaue Bohnen aus Blocken',
28 'description': 'md5:190d71ba2ccddc805ed01547718963bc',
33 'url': 'http://www.ndr.de/info/audio51535.html',
34 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
39 'title': 'La Valette entgeht der Hinrichtung',
40 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
46 def _real_extract(self
, url
):
47 mobj
= re
.match(self
._VALID
_URL
, url
)
48 video_id
= mobj
.group('id')
50 page
= self
._download
_webpage
(url
, video_id
, 'Downloading page')
52 title
= self
._og
_search
_title
(page
).strip()
53 description
= self
._og
_search
_description
(page
)
55 description
= description
.strip()
57 duration
= int_or_none(self
._html
_search
_regex
(r
'duration: (\d+),\n', page
, 'duration', fatal
=False))
61 mp3_url
= re
.search(r
'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page
)
64 'url': mp3_url
.group('audio'),
70 video_url
= re
.search(r
'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page
)
72 thumbnails
= re
.findall(r
'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page
)
74 quality_key
= qualities(['xs', 's', 'm', 'l', 'xl'])
75 largest
= max(thumbnails
, key
=lambda thumb
: quality_key(thumb
[1]))
76 thumbnail
= 'http://www.ndr.de' + largest
[0]
78 for format_id
in 'lo', 'hi', 'hq':
80 'url': '%s.%s.mp4' % (video_url
.group('video'), format_id
),
81 'format_id': format_id
,
85 raise ExtractorError('No media links available for %s' % video_id
)
90 'description': description
,
91 'thumbnail': thumbnail
,