]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rtvnh.py
d/p/disable-autoupdate-mechanism.patch: Extend to clean up errant import and README...
[youtubedl] / youtube_dl / extractor / rtvnh.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import ExtractorError
6
7
8 class RTVNHIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
10 _TEST = {
11 'url': 'http://www.rtvnh.nl/video/131946',
12 'md5': 'cdbec9f44550763c8afc96050fa747dc',
13 'info_dict': {
14 'id': '131946',
15 'ext': 'mp4',
16 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
17 'thumbnail': r're:^https?:.*\.jpg$'
18 }
19 }
20
21 def _real_extract(self, url):
22 video_id = self._match_id(url)
23
24 meta = self._parse_json(self._download_webpage(
25 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)
26
27 status = meta.get('status')
28 if status != 200:
29 raise ExtractorError(
30 '%s returned error code %d' % (self.IE_NAME, status), expected=True)
31
32 formats = []
33 rtmp_formats = self._extract_smil_formats(
34 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
35 formats.extend(rtmp_formats)
36
37 for rtmp_format in rtmp_formats:
38 rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
39 rtsp_format = rtmp_format.copy()
40 del rtsp_format['play_path']
41 del rtsp_format['ext']
42 rtsp_format.update({
43 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
44 'url': rtmp_url.replace('rtmp://', 'rtsp://'),
45 'protocol': 'rtsp',
46 })
47 formats.append(rtsp_format)
48 http_base_url = rtmp_url.replace('rtmp://', 'http://')
49 formats.extend(self._extract_m3u8_formats(
50 http_base_url + '/playlist.m3u8', video_id, 'mp4',
51 'm3u8_native', m3u8_id='hls', fatal=False))
52 formats.extend(self._extract_f4m_formats(
53 http_base_url + '/manifest.f4m',
54 video_id, f4m_id='hds', fatal=False))
55 self._sort_formats(formats)
56
57 return {
58 'id': video_id,
59 'title': meta['title'].strip(),
60 'thumbnail': meta.get('image'),
61 'formats': formats
62 }