]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ard.py
5793a4129d21c8c36e20d1b57607629e3adb94fd
3 from .common
import InfoExtractor
8 class ARDIE(InfoExtractor
):
9 _VALID_URL
= r
'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
10 _TITLE
= r
'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
11 _MEDIA_STREAM
= r
'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
13 u
'url': u
'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640',
14 u
'file': u
'14077640.mp4',
15 u
'md5': u
'6ca8824255460c787376353f9e20bbd8',
17 u
"title": u
"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
19 u
'skip': u
'Requires rtmpdump'
22 def _real_extract(self
, url
):
23 # determine video id from url
24 m
= re
.match(self
._VALID
_URL
, url
)
26 numid
= re
.search(r
'documentId=([0-9]+)', url
)
28 video_id
= numid
.group(1)
30 video_id
= m
.group('video_id')
32 # determine title and media streams from webpage
33 html
= self
._download
_webpage
(url
, video_id
)
34 title
= re
.search(self
._TITLE
, html
).group('title')
35 streams
= [m
.groupdict() for m
in re
.finditer(self
._MEDIA
_STREAM
, html
)]
37 assert '"fsk"' in html
38 raise ExtractorError(u
'This video is only available after 8:00 pm')
40 # choose default media type and highest quality for now
41 stream
= max([s
for s
in streams
if int(s
["media_type"]) == 0],
42 key
=lambda s
: int(s
["quality"]))
44 # there's two possibilities: RTMP stream or HTTP download
45 info
= {'id': video_id
, 'title': title
, 'ext': 'mp4'}
46 if stream
['rtmp_url']:
47 self
.to_screen(u
'RTMP download detected')
48 assert stream
['video_url'].startswith('mp4:')
49 info
["url"] = stream
["rtmp_url"]
50 info
["play_path"] = stream
['video_url']
52 assert stream
["video_url"].endswith('.mp4')
53 info
["url"] = stream
["video_url"]