]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvnow.py
Update upstream source from tag 'upstream/2018.09.10'
[youtubedl] / youtube_dl / extractor / tvnow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 ExtractorError,
10 int_or_none,
11 parse_iso8601,
12 parse_duration,
13 try_get,
14 update_url_query,
15 )
16
17
18 class TVNowBaseIE(InfoExtractor):
19 _VIDEO_FIELDS = (
20 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
21 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
22 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
23 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
24
25 def _call_api(self, path, video_id, query):
26 return self._download_json(
27 'https://api.tvnow.de/v3/' + path,
28 video_id, query=query)
29
30 def _extract_video(self, info, display_id):
31 video_id = compat_str(info['id'])
32 title = info['title']
33
34 paths = []
35 for manifest_url in (info.get('manifest') or {}).values():
36 if not manifest_url:
37 continue
38 manifest_url = update_url_query(manifest_url, {'filter': ''})
39 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
40 if path in paths:
41 continue
42 paths.append(path)
43
44 def url_repl(proto, suffix):
45 return re.sub(
46 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
47 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
48 '.ism/' + suffix, manifest_url))
49
50 formats = self._extract_mpd_formats(
51 url_repl('dash', '.mpd'), video_id,
52 mpd_id='dash', fatal=False)
53 formats.extend(self._extract_ism_formats(
54 url_repl('hss', 'Manifest'),
55 video_id, ism_id='mss', fatal=False))
56 formats.extend(self._extract_m3u8_formats(
57 url_repl('hls', '.m3u8'), video_id, 'mp4',
58 'm3u8_native', m3u8_id='hls', fatal=False))
59 if formats:
60 break
61 else:
62 if info.get('isDrm'):
63 raise ExtractorError(
64 'Video %s is DRM protected' % video_id, expected=True)
65 if info.get('geoblocked'):
66 raise self.raise_geo_restricted()
67 if not info.get('free', True):
68 raise ExtractorError(
69 'Video %s is not available for free' % video_id, expected=True)
70 self._sort_formats(formats)
71
72 description = info.get('articleLong') or info.get('articleShort')
73 timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
74 duration = parse_duration(info.get('duration'))
75
76 f = info.get('format', {})
77
78 thumbnails = [{
79 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
80 }]
81 thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
82 if thumbnail:
83 thumbnails.append({
84 'url': thumbnail,
85 })
86
87 return {
88 'id': video_id,
89 'display_id': display_id,
90 'title': title,
91 'description': description,
92 'thumbnails': thumbnails,
93 'timestamp': timestamp,
94 'duration': duration,
95 'series': f.get('title'),
96 'season_number': int_or_none(info.get('season')),
97 'episode_number': int_or_none(info.get('episode')),
98 'episode': title,
99 'formats': formats,
100 }
101
102
103 class TVNowIE(TVNowBaseIE):
104 _VALID_URL = r'''(?x)
105 https?://
106 (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
107 (?P<show_id>[^/]+)/
108 (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
109 '''
110
111 _TESTS = [{
112 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
113 'info_dict': {
114 'id': '331082',
115 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
116 'ext': 'mp4',
117 'title': 'Der neue Porsche 911 GT 3',
118 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
119 'thumbnail': r're:^https?://.*\.jpg$',
120 'timestamp': 1495994400,
121 'upload_date': '20170528',
122 'duration': 5283,
123 'series': 'GRIP - Das Motormagazin',
124 'season_number': 14,
125 'episode_number': 405,
126 'episode': 'Der neue Porsche 911 GT 3',
127 },
128 }, {
129 # rtl2
130 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
131 'only_matching': True,
132 }, {
133 # rtlnitro
134 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
135 'only_matching': True,
136 }, {
137 # superrtl
138 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
139 'only_matching': True,
140 }, {
141 # ntv
142 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
143 'only_matching': True,
144 }, {
145 # vox
146 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
147 'only_matching': True,
148 }, {
149 # rtlplus
150 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
151 'only_matching': True,
152 }, {
153 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
154 'only_matching': True,
155 }]
156
157 def _real_extract(self, url):
158 mobj = re.match(self._VALID_URL, url)
159 display_id = '%s/%s' % mobj.group(2, 3)
160
161 info = self._call_api(
162 'movies/' + display_id, display_id, query={
163 'fields': ','.join(self._VIDEO_FIELDS),
164 'station': mobj.group(1),
165 })
166
167 return self._extract_video(info, display_id)
168
169
170 class TVNowListBaseIE(TVNowBaseIE):
171 _SHOW_VALID_URL = r'''(?x)
172 (?P<base_url>
173 https?://
174 (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
175 (?P<show_id>[^/]+)
176 )
177 '''
178
179 def _extract_list_info(self, display_id, show_id):
180 fields = list(self._SHOW_FIELDS)
181 fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
182 fields.extend(
183 'formatTabs.formatTabPages.container.movies.%s' % field
184 for field in self._VIDEO_FIELDS)
185 return self._call_api(
186 'formats/seo', display_id, query={
187 'fields': ','.join(fields),
188 'name': show_id + '.php'
189 })
190
191
192 class TVNowListIE(TVNowListBaseIE):
193 _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
194
195 _SHOW_FIELDS = ('title', )
196 _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
197 _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
198
199 _TESTS = [{
200 'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
201 'info_dict': {
202 'id': '28296',
203 'title': '30 Minuten Deutschland - Aktuell',
204 },
205 'playlist_mincount': 1,
206 }, {
207 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
208 'only_matching': True,
209 }, {
210 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
211 'only_matching': True,
212 }]
213
214 @classmethod
215 def suitable(cls, url):
216 return (False if TVNowIE.suitable(url)
217 else super(TVNowListIE, cls).suitable(url))
218
219 def _real_extract(self, url):
220 base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
221
222 list_info = self._extract_list_info(season_id, show_id)
223
224 season = next(
225 season for season in list_info['formatTabs']['items']
226 if season.get('seoheadline') == season_id)
227
228 title = list_info.get('title')
229 headline = season.get('headline')
230 if title and headline:
231 title = '%s - %s' % (title, headline)
232 else:
233 title = headline or title
234
235 entries = []
236 for container in season['formatTabPages']['items']:
237 items = try_get(
238 container, lambda x: x['container']['movies']['items'],
239 list) or []
240 for info in items:
241 seo_url = info.get('seoUrl')
242 if not seo_url:
243 continue
244 video_id = info.get('id')
245 entries.append(self.url_result(
246 '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
247 compat_str(video_id) if video_id else None))
248
249 return self.playlist_result(
250 entries, compat_str(season.get('id') or season_id), title)
251
252
253 class TVNowShowIE(TVNowListBaseIE):
254 _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
255
256 _SHOW_FIELDS = ('id', 'title', )
257 _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
258 _VIDEO_FIELDS = ()
259
260 _TESTS = [{
261 'url': 'https://www.tvnow.at/vox/ab-ins-beet',
262 'info_dict': {
263 'id': 'ab-ins-beet',
264 'title': 'Ab ins Beet!',
265 },
266 'playlist_mincount': 7,
267 }, {
268 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
269 'only_matching': True,
270 }, {
271 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
272 'only_matching': True,
273 }]
274
275 @classmethod
276 def suitable(cls, url):
277 return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
278 else super(TVNowShowIE, cls).suitable(url))
279
280 def _real_extract(self, url):
281 base_url, show_id = re.match(self._VALID_URL, url).groups()
282
283 list_info = self._extract_list_info(show_id, show_id)
284
285 entries = []
286 for season_info in list_info['formatTabs']['items']:
287 season_url = season_info.get('seoheadline')
288 if not season_url:
289 continue
290 season_id = season_info.get('id')
291 entries.append(self.url_result(
292 '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
293 compat_str(season_id) if season_id else None,
294 season_info.get('headline')))
295
296 return self.playlist_result(entries, show_id, list_info.get('title'))