]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvnow.py
Fix extraction from youtube.
[youtubedl] / youtube_dl / extractor / tvnow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 ExtractorError,
10 int_or_none,
11 parse_iso8601,
12 parse_duration,
13 str_or_none,
14 update_url_query,
15 urljoin,
16 )
17
18
19 class TVNowBaseIE(InfoExtractor):
20 _VIDEO_FIELDS = (
21 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
22 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
23 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
24 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
25
26 def _call_api(self, path, video_id, query):
27 return self._download_json(
28 'https://api.tvnow.de/v3/' + path, video_id, query=query)
29
30 def _extract_video(self, info, display_id):
31 video_id = compat_str(info['id'])
32 title = info['title']
33
34 paths = []
35 for manifest_url in (info.get('manifest') or {}).values():
36 if not manifest_url:
37 continue
38 manifest_url = update_url_query(manifest_url, {'filter': ''})
39 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
40 if path in paths:
41 continue
42 paths.append(path)
43
44 def url_repl(proto, suffix):
45 return re.sub(
46 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
47 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
48 '.ism/' + suffix, manifest_url))
49
50 formats = self._extract_mpd_formats(
51 url_repl('dash', '.mpd'), video_id,
52 mpd_id='dash', fatal=False)
53 formats.extend(self._extract_ism_formats(
54 url_repl('hss', 'Manifest'),
55 video_id, ism_id='mss', fatal=False))
56 formats.extend(self._extract_m3u8_formats(
57 url_repl('hls', '.m3u8'), video_id, 'mp4',
58 'm3u8_native', m3u8_id='hls', fatal=False))
59 if formats:
60 break
61 else:
62 if info.get('isDrm'):
63 raise ExtractorError(
64 'Video %s is DRM protected' % video_id, expected=True)
65 if info.get('geoblocked'):
66 raise self.raise_geo_restricted()
67 if not info.get('free', True):
68 raise ExtractorError(
69 'Video %s is not available for free' % video_id, expected=True)
70 self._sort_formats(formats)
71
72 description = info.get('articleLong') or info.get('articleShort')
73 timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
74 duration = parse_duration(info.get('duration'))
75
76 f = info.get('format', {})
77
78 thumbnails = [{
79 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
80 }]
81 thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
82 if thumbnail:
83 thumbnails.append({
84 'url': thumbnail,
85 })
86
87 return {
88 'id': video_id,
89 'display_id': display_id,
90 'title': title,
91 'description': description,
92 'thumbnails': thumbnails,
93 'timestamp': timestamp,
94 'duration': duration,
95 'series': f.get('title'),
96 'season_number': int_or_none(info.get('season')),
97 'episode_number': int_or_none(info.get('episode')),
98 'episode': title,
99 'formats': formats,
100 }
101
102
103 class TVNowIE(TVNowBaseIE):
104 _VALID_URL = r'''(?x)
105 https?://
106 (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
107 (?P<show_id>[^/]+)/
108 (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
109 '''
110
111 @classmethod
112 def suitable(cls, url):
113 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
114 else super(TVNowIE, cls).suitable(url))
115
116 _TESTS = [{
117 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
118 'info_dict': {
119 'id': '331082',
120 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
121 'ext': 'mp4',
122 'title': 'Der neue Porsche 911 GT 3',
123 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
124 'timestamp': 1495994400,
125 'upload_date': '20170528',
126 'duration': 5283,
127 'series': 'GRIP - Das Motormagazin',
128 'season_number': 14,
129 'episode_number': 405,
130 'episode': 'Der neue Porsche 911 GT 3',
131 },
132 }, {
133 # rtl2
134 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
135 'only_matching': True,
136 }, {
137 # rtlnitro
138 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
139 'only_matching': True,
140 }, {
141 # superrtl
142 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
143 'only_matching': True,
144 }, {
145 # ntv
146 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
147 'only_matching': True,
148 }, {
149 # vox
150 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
151 'only_matching': True,
152 }, {
153 # rtlplus
154 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
155 'only_matching': True,
156 }, {
157 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
158 'only_matching': True,
159 }]
160
161 def _real_extract(self, url):
162 mobj = re.match(self._VALID_URL, url)
163 display_id = '%s/%s' % mobj.group(2, 3)
164
165 info = self._call_api(
166 'movies/' + display_id, display_id, query={
167 'fields': ','.join(self._VIDEO_FIELDS),
168 })
169
170 return self._extract_video(info, display_id)
171
172
173 class TVNowNewIE(InfoExtractor):
174 _VALID_URL = r'''(?x)
175 (?P<base_url>https?://
176 (?:www\.)?tvnow\.(?:de|at|ch)/
177 (?:shows|serien))/
178 (?P<show>[^/]+)-\d+/
179 [^/]+/
180 episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
181 '''
182
183 _TESTS = [{
184 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
185 'only_matching': True,
186 }]
187
188 def _real_extract(self, url):
189 mobj = re.match(self._VALID_URL, url)
190 base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
191 show, episode = mobj.group('show', 'episode')
192 return self.url_result(
193 # Rewrite new URLs to the old format and use extraction via old API
194 # at api.tvnow.de as a loophole for bypassing premium content checks
195 '%s/%s/%s' % (base_url, show, episode),
196 ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
197
198
199 class TVNowNewBaseIE(InfoExtractor):
200 def _call_api(self, path, video_id, query={}):
201 result = self._download_json(
202 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
203 error = result.get('error')
204 if error:
205 raise ExtractorError(
206 '%s said: %s' % (self.IE_NAME, error), expected=True)
207 return result
208
209
210 """
211 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
212 when api.tvnow.de is shut down. This version can't bypass premium checks though.
213 class TVNowIE(TVNowNewBaseIE):
214 _VALID_URL = r'''(?x)
215 https?://
216 (?:www\.)?tvnow\.(?:de|at|ch)/
217 (?:shows|serien)/[^/]+/
218 (?:[^/]+/)+
219 (?P<display_id>[^/?$&]+)-(?P<id>\d+)
220 '''
221
222 _TESTS = [{
223 # episode with annual navigation
224 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
225 'info_dict': {
226 'id': '331082',
227 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
228 'ext': 'mp4',
229 'title': 'Der neue Porsche 911 GT 3',
230 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
231 'thumbnail': r're:^https?://.*\.jpg$',
232 'timestamp': 1495994400,
233 'upload_date': '20170528',
234 'duration': 5283,
235 'series': 'GRIP - Das Motormagazin',
236 'season_number': 14,
237 'episode_number': 405,
238 'episode': 'Der neue Porsche 911 GT 3',
239 },
240 }, {
241 # rtl2, episode with season navigation
242 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
243 'only_matching': True,
244 }, {
245 # rtlnitro
246 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
247 'only_matching': True,
248 }, {
249 # superrtl
250 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
251 'only_matching': True,
252 }, {
253 # ntv
254 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
255 'only_matching': True,
256 }, {
257 # vox
258 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
259 'only_matching': True,
260 }, {
261 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
262 'only_matching': True,
263 }]
264
265 def _extract_video(self, info, url, display_id):
266 config = info['config']
267 source = config['source']
268
269 video_id = compat_str(info.get('id') or source['videoId'])
270 title = source['title'].strip()
271
272 paths = []
273 for manifest_url in (info.get('manifest') or {}).values():
274 if not manifest_url:
275 continue
276 manifest_url = update_url_query(manifest_url, {'filter': ''})
277 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
278 if path in paths:
279 continue
280 paths.append(path)
281
282 def url_repl(proto, suffix):
283 return re.sub(
284 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
285 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
286 '.ism/' + suffix, manifest_url))
287
288 formats = self._extract_mpd_formats(
289 url_repl('dash', '.mpd'), video_id,
290 mpd_id='dash', fatal=False)
291 formats.extend(self._extract_ism_formats(
292 url_repl('hss', 'Manifest'),
293 video_id, ism_id='mss', fatal=False))
294 formats.extend(self._extract_m3u8_formats(
295 url_repl('hls', '.m3u8'), video_id, 'mp4',
296 'm3u8_native', m3u8_id='hls', fatal=False))
297 if formats:
298 break
299 else:
300 if try_get(info, lambda x: x['rights']['isDrm']):
301 raise ExtractorError(
302 'Video %s is DRM protected' % video_id, expected=True)
303 if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
304 raise self.raise_geo_restricted()
305 if not info.get('free', True):
306 raise ExtractorError(
307 'Video %s is not available for free' % video_id, expected=True)
308 self._sort_formats(formats)
309
310 description = source.get('description')
311 thumbnail = url_or_none(source.get('poster'))
312 timestamp = unified_timestamp(source.get('previewStart'))
313 duration = parse_duration(source.get('length'))
314
315 series = source.get('format')
316 season_number = int_or_none(self._search_regex(
317 r'staffel-(\d+)', url, 'season number', default=None))
318 episode_number = int_or_none(self._search_regex(
319 r'episode-(\d+)', url, 'episode number', default=None))
320
321 return {
322 'id': video_id,
323 'display_id': display_id,
324 'title': title,
325 'description': description,
326 'thumbnail': thumbnail,
327 'timestamp': timestamp,
328 'duration': duration,
329 'series': series,
330 'season_number': season_number,
331 'episode_number': episode_number,
332 'episode': title,
333 'formats': formats,
334 }
335
336 def _real_extract(self, url):
337 display_id, video_id = re.match(self._VALID_URL, url).groups()
338 info = self._call_api('player/' + video_id, video_id)
339 return self._extract_video(info, video_id, display_id)
340 """
341
342
343 class TVNowListBaseIE(TVNowNewBaseIE):
344 _SHOW_VALID_URL = r'''(?x)
345 (?P<base_url>
346 https?://
347 (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
348 [^/?#&]+-(?P<show_id>\d+)
349 )
350 '''
351
352 @classmethod
353 def suitable(cls, url):
354 return (False if TVNowNewIE.suitable(url)
355 else super(TVNowListBaseIE, cls).suitable(url))
356
357 def _extract_items(self, url, show_id, list_id, query):
358 items = self._call_api(
359 'teaserrow/format/episode/' + show_id, list_id,
360 query=query)['items']
361
362 entries = []
363 for item in items:
364 if not isinstance(item, dict):
365 continue
366 item_url = urljoin(url, item.get('url'))
367 if not item_url:
368 continue
369 video_id = str_or_none(item.get('id') or item.get('videoId'))
370 item_title = item.get('subheadline') or item.get('text')
371 entries.append(self.url_result(
372 item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
373 video_title=item_title))
374
375 return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
376
377
378 class TVNowSeasonIE(TVNowListBaseIE):
379 _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
380 _TESTS = [{
381 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
382 'info_dict': {
383 'id': '1815/13',
384 },
385 'playlist_mincount': 22,
386 }]
387
388 def _real_extract(self, url):
389 _, show_id, season_id = re.match(self._VALID_URL, url).groups()
390 return self._extract_items(
391 url, show_id, season_id, {'season': season_id})
392
393
394 class TVNowAnnualIE(TVNowListBaseIE):
395 _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
396 _TESTS = [{
397 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
398 'info_dict': {
399 'id': '1669/2017-05',
400 },
401 'playlist_mincount': 2,
402 }]
403
404 def _real_extract(self, url):
405 _, show_id, year, month = re.match(self._VALID_URL, url).groups()
406 return self._extract_items(
407 url, show_id, '%s-%s' % (year, month), {
408 'year': int(year),
409 'month': int(month),
410 })
411
412
413 class TVNowShowIE(TVNowListBaseIE):
414 _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
415 _TESTS = [{
416 # annual navigationType
417 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
418 'info_dict': {
419 'id': '1669',
420 },
421 'playlist_mincount': 73,
422 }, {
423 # season navigationType
424 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
425 'info_dict': {
426 'id': '11471',
427 },
428 'playlist_mincount': 3,
429 }]
430
431 @classmethod
432 def suitable(cls, url):
433 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
434 else super(TVNowShowIE, cls).suitable(url))
435
436 def _real_extract(self, url):
437 base_url, show_id = re.match(self._VALID_URL, url).groups()
438
439 result = self._call_api(
440 'teaserrow/format/navigation/' + show_id, show_id)
441
442 items = result['items']
443
444 entries = []
445 navigation = result.get('navigationType')
446 if navigation == 'annual':
447 for item in items:
448 if not isinstance(item, dict):
449 continue
450 year = int_or_none(item.get('year'))
451 if year is None:
452 continue
453 months = item.get('months')
454 if not isinstance(months, list):
455 continue
456 for month_dict in months:
457 if not isinstance(month_dict, dict) or not month_dict:
458 continue
459 month_number = int_or_none(list(month_dict.keys())[0])
460 if month_number is None:
461 continue
462 entries.append(self.url_result(
463 '%s/%04d-%02d' % (base_url, year, month_number),
464 ie=TVNowAnnualIE.ie_key()))
465 elif navigation == 'season':
466 for item in items:
467 if not isinstance(item, dict):
468 continue
469 season_number = int_or_none(item.get('season'))
470 if season_number is None:
471 continue
472 entries.append(self.url_result(
473 '%s/staffel-%d' % (base_url, season_number),
474 ie=TVNowSeasonIE.ie_key()))
475 else:
476 raise ExtractorError('Unknown navigationType')
477
478 return self.playlist_result(entries, show_id)