]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ard.py
e1ecdf4d3672215068bf223c282daa5f9609a02e
3 from .common
import InfoExtractor
8 class ARDIE(InfoExtractor
):
9 _VALID_URL
= r
'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
10 _TITLE
= r
'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
11 _MEDIA_STREAM
= r
'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
13 def _real_extract(self
, url
):
14 # determine video id from url
15 m
= re
.match(self
._VALID
_URL
, url
)
17 numid
= re
.search(r
'documentId=([0-9]+)', url
)
19 video_id
= numid
.group(1)
21 video_id
= m
.group('video_id')
23 # determine title and media streams from webpage
24 html
= self
._download
_webpage
(url
, video_id
)
25 title
= re
.search(self
._TITLE
, html
).group('title')
26 streams
= [m
.groupdict() for m
in re
.finditer(self
._MEDIA
_STREAM
, html
)]
28 assert '"fsk"' in html
29 raise ExtractorError(u
'This video is only available after 8:00 pm')
31 # choose default media type and highest quality for now
32 stream
= max([s
for s
in streams
if int(s
["media_type"]) == 0],
33 key
=lambda s
: int(s
["quality"]))
35 # there's two possibilities: RTMP stream or HTTP download
36 info
= {'id': video_id
, 'title': title
, 'ext': 'mp4'}
37 if stream
['rtmp_url']:
38 self
.to_screen(u
'RTMP download detected')
39 assert stream
['video_url'].startswith('mp4:')
40 info
["url"] = stream
["rtmp_url"]
41 info
["play_path"] = stream
['video_url']
43 assert stream
["video_url"].endswith('.mp4')
44 info
["url"] = stream
["video_url"]