]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rtbf.py
Update upstream source from tag 'upstream/2019.09.28'
[youtubedl] / youtube_dl / extractor / rtbf.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 float_or_none,
10 int_or_none,
11 strip_or_none,
12 )
13
14
15 class RTBFIE(InfoExtractor):
16 _VALID_URL = r'''(?x)
17 https?://(?:www\.)?rtbf\.be/
18 (?:
19 video/[^?]+\?.*\bid=|
20 ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
21 auvio/[^/]+\?.*\b(?P<live>l)?id=
22 )(?P<id>\d+)'''
23 _TESTS = [{
24 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
25 'md5': '8c876a1cceeb6cf31b476461ade72384',
26 'info_dict': {
27 'id': '1921274',
28 'ext': 'mp4',
29 'title': 'Les Diables au coeur (épisode 2)',
30 'description': '(du 25/04/2014)',
31 'duration': 3099.54,
32 'upload_date': '20140425',
33 'timestamp': 1398456300,
34 }
35 }, {
36 # geo restricted
37 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
38 'only_matching': True,
39 }, {
40 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
41 'only_matching': True,
42 }, {
43 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
44 'only_matching': True,
45 }, {
46 # Live
47 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
48 'only_matching': True,
49 }, {
50 # Audio
51 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
52 'only_matching': True,
53 }, {
54 # With Subtitle
55 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
56 'only_matching': True,
57 }]
58 _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
59 _PROVIDERS = {
60 'YOUTUBE': 'Youtube',
61 'DAILYMOTION': 'Dailymotion',
62 'VIMEO': 'Vimeo',
63 }
64 _QUALITIES = [
65 ('mobile', 'SD'),
66 ('web', 'MD'),
67 ('high', 'HD'),
68 ]
69
70 def _real_extract(self, url):
71 live, media_id = re.match(self._VALID_URL, url).groups()
72 embed_page = self._download_webpage(
73 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
74 media_id, query={'id': media_id})
75 data = self._parse_json(self._html_search_regex(
76 r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
77
78 error = data.get('error')
79 if error:
80 raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
81
82 provider = data.get('provider')
83 if provider in self._PROVIDERS:
84 return self.url_result(data['url'], self._PROVIDERS[provider])
85
86 title = data['title']
87 is_live = data.get('isLive')
88 if is_live:
89 title = self._live_title(title)
90 height_re = r'-(\d+)p\.'
91 formats = []
92
93 m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
94 if m3u8_url:
95 formats.extend(self._extract_m3u8_formats(
96 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
97
98 fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
99 http_url = data.get('url')
100 if formats and http_url and re.search(height_re, http_url):
101 http_url = fix_url(http_url)
102 for m3u8_f in formats[:]:
103 height = m3u8_f.get('height')
104 if not height:
105 continue
106 f = m3u8_f.copy()
107 del f['protocol']
108 f.update({
109 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
110 'url': re.sub(height_re, '-%dp.' % height, http_url),
111 })
112 formats.append(f)
113 else:
114 sources = data.get('sources') or {}
115 for key, format_id in self._QUALITIES:
116 format_url = sources.get(key)
117 if not format_url:
118 continue
119 height = int_or_none(self._search_regex(
120 height_re, format_url, 'height', default=None))
121 formats.append({
122 'format_id': format_id,
123 'url': fix_url(format_url),
124 'height': height,
125 })
126
127 mpd_url = data.get('urlDash')
128 if not data.get('drm') and mpd_url:
129 formats.extend(self._extract_mpd_formats(
130 mpd_url, media_id, mpd_id='dash', fatal=False))
131
132 audio_url = data.get('urlAudio')
133 if audio_url:
134 formats.append({
135 'format_id': 'audio',
136 'url': audio_url,
137 'vcodec': 'none',
138 })
139 self._sort_formats(formats)
140
141 subtitles = {}
142 for track in (data.get('tracks') or {}).values():
143 sub_url = track.get('url')
144 if not sub_url:
145 continue
146 subtitles.setdefault(track.get('lang') or 'fr', []).append({
147 'url': sub_url,
148 })
149
150 return {
151 'id': media_id,
152 'formats': formats,
153 'title': title,
154 'description': strip_or_none(data.get('description')),
155 'thumbnail': data.get('thumbnail'),
156 'duration': float_or_none(data.get('realDuration')),
157 'timestamp': int_or_none(data.get('liveFrom')),
158 'series': data.get('programLabel'),
159 'subtitles': subtitles,
160 'is_live': is_live,
161 }