]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvnow.py
d/rules: Run offline tests.
[youtubedl] / youtube_dl / extractor / tvnow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 ExtractorError,
10 int_or_none,
11 parse_iso8601,
12 parse_duration,
13 str_or_none,
14 update_url_query,
15 urljoin,
16 )
17
18
19 class TVNowBaseIE(InfoExtractor):
20 _VIDEO_FIELDS = (
21 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
22 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
23 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
24 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
25
26 def _call_api(self, path, video_id, query):
27 return self._download_json(
28 'https://api.tvnow.de/v3/' + path, video_id, query=query)
29
30 def _extract_video(self, info, display_id):
31 video_id = compat_str(info['id'])
32 title = info['title']
33
34 paths = []
35 for manifest_url in (info.get('manifest') or {}).values():
36 if not manifest_url:
37 continue
38 manifest_url = update_url_query(manifest_url, {'filter': ''})
39 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
40 if path in paths:
41 continue
42 paths.append(path)
43
44 def url_repl(proto, suffix):
45 return re.sub(
46 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
47 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
48 '.ism/' + suffix, manifest_url))
49
50 def make_urls(proto, suffix):
51 urls = [url_repl(proto, suffix)]
52 hd_url = urls[0].replace('/manifest/', '/ngvod/')
53 if hd_url != urls[0]:
54 urls.append(hd_url)
55 return urls
56
57 for man_url in make_urls('dash', '.mpd'):
58 formats = self._extract_mpd_formats(
59 man_url, video_id, mpd_id='dash', fatal=False)
60 for man_url in make_urls('hss', 'Manifest'):
61 formats.extend(self._extract_ism_formats(
62 man_url, video_id, ism_id='mss', fatal=False))
63 for man_url in make_urls('hls', '.m3u8'):
64 formats.extend(self._extract_m3u8_formats(
65 man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
66 fatal=False))
67 if formats:
68 break
69 else:
70 if info.get('isDrm'):
71 raise ExtractorError(
72 'Video %s is DRM protected' % video_id, expected=True)
73 if info.get('geoblocked'):
74 raise self.raise_geo_restricted()
75 if not info.get('free', True):
76 raise ExtractorError(
77 'Video %s is not available for free' % video_id, expected=True)
78 self._sort_formats(formats)
79
80 description = info.get('articleLong') or info.get('articleShort')
81 timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
82 duration = parse_duration(info.get('duration'))
83
84 f = info.get('format', {})
85
86 thumbnails = [{
87 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
88 }]
89 thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
90 if thumbnail:
91 thumbnails.append({
92 'url': thumbnail,
93 })
94
95 return {
96 'id': video_id,
97 'display_id': display_id,
98 'title': title,
99 'description': description,
100 'thumbnails': thumbnails,
101 'timestamp': timestamp,
102 'duration': duration,
103 'series': f.get('title'),
104 'season_number': int_or_none(info.get('season')),
105 'episode_number': int_or_none(info.get('episode')),
106 'episode': title,
107 'formats': formats,
108 }
109
110
111 class TVNowIE(TVNowBaseIE):
112 _VALID_URL = r'''(?x)
113 https?://
114 (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
115 (?P<show_id>[^/]+)/
116 (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
117 '''
118
119 @classmethod
120 def suitable(cls, url):
121 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
122 else super(TVNowIE, cls).suitable(url))
123
124 _TESTS = [{
125 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
126 'info_dict': {
127 'id': '331082',
128 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
129 'ext': 'mp4',
130 'title': 'Der neue Porsche 911 GT 3',
131 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
132 'timestamp': 1495994400,
133 'upload_date': '20170528',
134 'duration': 5283,
135 'series': 'GRIP - Das Motormagazin',
136 'season_number': 14,
137 'episode_number': 405,
138 'episode': 'Der neue Porsche 911 GT 3',
139 },
140 }, {
141 # rtl2
142 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
143 'only_matching': True,
144 }, {
145 # rtlnitro
146 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
147 'only_matching': True,
148 }, {
149 # superrtl
150 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
151 'only_matching': True,
152 }, {
153 # ntv
154 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
155 'only_matching': True,
156 }, {
157 # vox
158 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
159 'only_matching': True,
160 }, {
161 # rtlplus
162 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
163 'only_matching': True,
164 }, {
165 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
166 'only_matching': True,
167 }]
168
169 def _real_extract(self, url):
170 mobj = re.match(self._VALID_URL, url)
171 display_id = '%s/%s' % mobj.group(2, 3)
172
173 info = self._call_api(
174 'movies/' + display_id, display_id, query={
175 'fields': ','.join(self._VIDEO_FIELDS),
176 })
177
178 return self._extract_video(info, display_id)
179
180
181 class TVNowNewIE(InfoExtractor):
182 _VALID_URL = r'''(?x)
183 (?P<base_url>https?://
184 (?:www\.)?tvnow\.(?:de|at|ch)/
185 (?:shows|serien))/
186 (?P<show>[^/]+)-\d+/
187 [^/]+/
188 episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
189 '''
190
191 _TESTS = [{
192 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
193 'only_matching': True,
194 }]
195
196 def _real_extract(self, url):
197 mobj = re.match(self._VALID_URL, url)
198 base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
199 show, episode = mobj.group('show', 'episode')
200 return self.url_result(
201 # Rewrite new URLs to the old format and use extraction via old API
202 # at api.tvnow.de as a loophole for bypassing premium content checks
203 '%s/%s/%s' % (base_url, show, episode),
204 ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
205
206
207 class TVNowNewBaseIE(InfoExtractor):
208 def _call_api(self, path, video_id, query={}):
209 result = self._download_json(
210 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
211 error = result.get('error')
212 if error:
213 raise ExtractorError(
214 '%s said: %s' % (self.IE_NAME, error), expected=True)
215 return result
216
217
218 r"""
219 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
220 when api.tvnow.de is shut down. This version can't bypass premium checks though.
221 class TVNowIE(TVNowNewBaseIE):
222 _VALID_URL = r'''(?x)
223 https?://
224 (?:www\.)?tvnow\.(?:de|at|ch)/
225 (?:shows|serien)/[^/]+/
226 (?:[^/]+/)+
227 (?P<display_id>[^/?$&]+)-(?P<id>\d+)
228 '''
229
230 _TESTS = [{
231 # episode with annual navigation
232 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
233 'info_dict': {
234 'id': '331082',
235 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
236 'ext': 'mp4',
237 'title': 'Der neue Porsche 911 GT 3',
238 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
239 'thumbnail': r're:^https?://.*\.jpg$',
240 'timestamp': 1495994400,
241 'upload_date': '20170528',
242 'duration': 5283,
243 'series': 'GRIP - Das Motormagazin',
244 'season_number': 14,
245 'episode_number': 405,
246 'episode': 'Der neue Porsche 911 GT 3',
247 },
248 }, {
249 # rtl2, episode with season navigation
250 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
251 'only_matching': True,
252 }, {
253 # rtlnitro
254 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
255 'only_matching': True,
256 }, {
257 # superrtl
258 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
259 'only_matching': True,
260 }, {
261 # ntv
262 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
263 'only_matching': True,
264 }, {
265 # vox
266 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
267 'only_matching': True,
268 }, {
269 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
270 'only_matching': True,
271 }]
272
273 def _extract_video(self, info, url, display_id):
274 config = info['config']
275 source = config['source']
276
277 video_id = compat_str(info.get('id') or source['videoId'])
278 title = source['title'].strip()
279
280 paths = []
281 for manifest_url in (info.get('manifest') or {}).values():
282 if not manifest_url:
283 continue
284 manifest_url = update_url_query(manifest_url, {'filter': ''})
285 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
286 if path in paths:
287 continue
288 paths.append(path)
289
290 def url_repl(proto, suffix):
291 return re.sub(
292 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
293 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
294 '.ism/' + suffix, manifest_url))
295
296 formats = self._extract_mpd_formats(
297 url_repl('dash', '.mpd'), video_id,
298 mpd_id='dash', fatal=False)
299 formats.extend(self._extract_ism_formats(
300 url_repl('hss', 'Manifest'),
301 video_id, ism_id='mss', fatal=False))
302 formats.extend(self._extract_m3u8_formats(
303 url_repl('hls', '.m3u8'), video_id, 'mp4',
304 'm3u8_native', m3u8_id='hls', fatal=False))
305 if formats:
306 break
307 else:
308 if try_get(info, lambda x: x['rights']['isDrm']):
309 raise ExtractorError(
310 'Video %s is DRM protected' % video_id, expected=True)
311 if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
312 raise self.raise_geo_restricted()
313 if not info.get('free', True):
314 raise ExtractorError(
315 'Video %s is not available for free' % video_id, expected=True)
316 self._sort_formats(formats)
317
318 description = source.get('description')
319 thumbnail = url_or_none(source.get('poster'))
320 timestamp = unified_timestamp(source.get('previewStart'))
321 duration = parse_duration(source.get('length'))
322
323 series = source.get('format')
324 season_number = int_or_none(self._search_regex(
325 r'staffel-(\d+)', url, 'season number', default=None))
326 episode_number = int_or_none(self._search_regex(
327 r'episode-(\d+)', url, 'episode number', default=None))
328
329 return {
330 'id': video_id,
331 'display_id': display_id,
332 'title': title,
333 'description': description,
334 'thumbnail': thumbnail,
335 'timestamp': timestamp,
336 'duration': duration,
337 'series': series,
338 'season_number': season_number,
339 'episode_number': episode_number,
340 'episode': title,
341 'formats': formats,
342 }
343
344 def _real_extract(self, url):
345 display_id, video_id = re.match(self._VALID_URL, url).groups()
346 info = self._call_api('player/' + video_id, video_id)
347 return self._extract_video(info, video_id, display_id)
348 """
349
350
351 class TVNowListBaseIE(TVNowNewBaseIE):
352 _SHOW_VALID_URL = r'''(?x)
353 (?P<base_url>
354 https?://
355 (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
356 [^/?#&]+-(?P<show_id>\d+)
357 )
358 '''
359
360 @classmethod
361 def suitable(cls, url):
362 return (False if TVNowNewIE.suitable(url)
363 else super(TVNowListBaseIE, cls).suitable(url))
364
365 def _extract_items(self, url, show_id, list_id, query):
366 items = self._call_api(
367 'teaserrow/format/episode/' + show_id, list_id,
368 query=query)['items']
369
370 entries = []
371 for item in items:
372 if not isinstance(item, dict):
373 continue
374 item_url = urljoin(url, item.get('url'))
375 if not item_url:
376 continue
377 video_id = str_or_none(item.get('id') or item.get('videoId'))
378 item_title = item.get('subheadline') or item.get('text')
379 entries.append(self.url_result(
380 item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
381 video_title=item_title))
382
383 return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
384
385
386 class TVNowSeasonIE(TVNowListBaseIE):
387 _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
388 _TESTS = [{
389 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
390 'info_dict': {
391 'id': '1815/13',
392 },
393 'playlist_mincount': 22,
394 }]
395
396 def _real_extract(self, url):
397 _, show_id, season_id = re.match(self._VALID_URL, url).groups()
398 return self._extract_items(
399 url, show_id, season_id, {'season': season_id})
400
401
402 class TVNowAnnualIE(TVNowListBaseIE):
403 _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
404 _TESTS = [{
405 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
406 'info_dict': {
407 'id': '1669/2017-05',
408 },
409 'playlist_mincount': 2,
410 }]
411
412 def _real_extract(self, url):
413 _, show_id, year, month = re.match(self._VALID_URL, url).groups()
414 return self._extract_items(
415 url, show_id, '%s-%s' % (year, month), {
416 'year': int(year),
417 'month': int(month),
418 })
419
420
421 class TVNowShowIE(TVNowListBaseIE):
422 _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
423 _TESTS = [{
424 # annual navigationType
425 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
426 'info_dict': {
427 'id': '1669',
428 },
429 'playlist_mincount': 73,
430 }, {
431 # season navigationType
432 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
433 'info_dict': {
434 'id': '11471',
435 },
436 'playlist_mincount': 3,
437 }]
438
439 @classmethod
440 def suitable(cls, url):
441 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
442 else super(TVNowShowIE, cls).suitable(url))
443
444 def _real_extract(self, url):
445 base_url, show_id = re.match(self._VALID_URL, url).groups()
446
447 result = self._call_api(
448 'teaserrow/format/navigation/' + show_id, show_id)
449
450 items = result['items']
451
452 entries = []
453 navigation = result.get('navigationType')
454 if navigation == 'annual':
455 for item in items:
456 if not isinstance(item, dict):
457 continue
458 year = int_or_none(item.get('year'))
459 if year is None:
460 continue
461 months = item.get('months')
462 if not isinstance(months, list):
463 continue
464 for month_dict in months:
465 if not isinstance(month_dict, dict) or not month_dict:
466 continue
467 month_number = int_or_none(list(month_dict.keys())[0])
468 if month_number is None:
469 continue
470 entries.append(self.url_result(
471 '%s/%04d-%02d' % (base_url, year, month_number),
472 ie=TVNowAnnualIE.ie_key()))
473 elif navigation == 'season':
474 for item in items:
475 if not isinstance(item, dict):
476 continue
477 season_number = int_or_none(item.get('season'))
478 if season_number is None:
479 continue
480 entries.append(self.url_result(
481 '%s/staffel-%d' % (base_url, season_number),
482 ie=TVNowSeasonIE.ie_key()))
483 else:
484 raise ExtractorError('Unknown navigationType')
485
486 return self.playlist_result(entries, show_id)