]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvnow.py
New upstream version 2018.04.25
[youtubedl] / youtube_dl / extractor / tvnow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 ExtractorError,
10 int_or_none,
11 parse_iso8601,
12 parse_duration,
13 try_get,
14 update_url_query,
15 )
16
17
18 class TVNowBaseIE(InfoExtractor):
19 _VIDEO_FIELDS = (
20 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
21 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
22 'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
23 'format.defaultImage169Logo')
24
25 def _call_api(self, path, video_id, query):
26 return self._download_json(
27 'https://api.tvnow.de/v3/' + path,
28 video_id, query=query)
29
30 def _extract_video(self, info, display_id):
31 video_id = compat_str(info['id'])
32 title = info['title']
33
34 mpd_url = info['manifest']['dashclear']
35 if not mpd_url:
36 if info.get('isDrm'):
37 raise ExtractorError(
38 'Video %s is DRM protected' % video_id, expected=True)
39 if info.get('geoblocked'):
40 raise ExtractorError(
41 'Video %s is not available from your location due to geo restriction' % video_id,
42 expected=True)
43 if not info.get('free', True):
44 raise ExtractorError(
45 'Video %s is not available for free' % video_id, expected=True)
46
47 mpd_url = update_url_query(mpd_url, {'filter': ''})
48 formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False)
49 formats.extend(self._extract_ism_formats(
50 mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'),
51 video_id, ism_id='mss', fatal=False))
52 formats.extend(self._extract_m3u8_formats(
53 mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'),
54 video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
55 self._sort_formats(formats)
56
57 description = info.get('articleLong') or info.get('articleShort')
58 timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
59 duration = parse_duration(info.get('duration'))
60
61 f = info.get('format', {})
62
63 thumbnails = [{
64 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
65 }]
66 thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
67 if thumbnail:
68 thumbnails.append({
69 'url': thumbnail,
70 })
71
72 return {
73 'id': video_id,
74 'display_id': display_id,
75 'title': title,
76 'description': description,
77 'thumbnails': thumbnails,
78 'timestamp': timestamp,
79 'duration': duration,
80 'series': f.get('title'),
81 'season_number': int_or_none(info.get('season')),
82 'episode_number': int_or_none(info.get('episode')),
83 'episode': title,
84 'formats': formats,
85 }
86
87
88 class TVNowIE(TVNowBaseIE):
89 _VALID_URL = r'''(?x)
90 https?://
91 (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
92 (?P<show_id>[^/]+)/
93 (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
94 '''
95
96 _TESTS = [{
97 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
98 'info_dict': {
99 'id': '331082',
100 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
101 'ext': 'mp4',
102 'title': 'Der neue Porsche 911 GT 3',
103 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
104 'thumbnail': r're:^https?://.*\.jpg$',
105 'timestamp': 1495994400,
106 'upload_date': '20170528',
107 'duration': 5283,
108 'series': 'GRIP - Das Motormagazin',
109 'season_number': 14,
110 'episode_number': 405,
111 'episode': 'Der neue Porsche 911 GT 3',
112 },
113 }, {
114 # rtl2
115 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
116 'only_matching': True,
117 }, {
118 # rtlnitro
119 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
120 'only_matching': True,
121 }, {
122 # superrtl
123 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
124 'only_matching': True,
125 }, {
126 # ntv
127 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
128 'only_matching': True,
129 }, {
130 # vox
131 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
132 'only_matching': True,
133 }, {
134 # rtlplus
135 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
136 'only_matching': True,
137 }, {
138 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
139 'only_matching': True,
140 }]
141
142 def _real_extract(self, url):
143 display_id = '%s/%s' % re.match(self._VALID_URL, url).groups()
144
145 info = self._call_api(
146 'movies/' + display_id, display_id, query={
147 'fields': ','.join(self._VIDEO_FIELDS),
148 })
149
150 return self._extract_video(info, display_id)
151
152
153 class TVNowListBaseIE(TVNowBaseIE):
154 _SHOW_VALID_URL = r'''(?x)
155 (?P<base_url>
156 https?://
157 (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
158 (?P<show_id>[^/]+)
159 )
160 '''
161
162 def _extract_list_info(self, display_id, show_id):
163 fields = list(self._SHOW_FIELDS)
164 fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
165 fields.extend(
166 'formatTabs.formatTabPages.container.movies.%s' % field
167 for field in self._VIDEO_FIELDS)
168 return self._call_api(
169 'formats/seo', display_id, query={
170 'fields': ','.join(fields),
171 'name': show_id + '.php'
172 })
173
174
175 class TVNowListIE(TVNowListBaseIE):
176 _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
177
178 _SHOW_FIELDS = ('title', )
179 _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
180 _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
181
182 _TESTS = [{
183 'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
184 'info_dict': {
185 'id': '28296',
186 'title': '30 Minuten Deutschland - Aktuell',
187 },
188 'playlist_mincount': 1,
189 }, {
190 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
191 'only_matching': True,
192 }, {
193 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
194 'only_matching': True,
195 }]
196
197 @classmethod
198 def suitable(cls, url):
199 return (False if TVNowIE.suitable(url)
200 else super(TVNowListIE, cls).suitable(url))
201
202 def _real_extract(self, url):
203 base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
204
205 list_info = self._extract_list_info(season_id, show_id)
206
207 season = next(
208 season for season in list_info['formatTabs']['items']
209 if season.get('seoheadline') == season_id)
210
211 title = list_info.get('title')
212 headline = season.get('headline')
213 if title and headline:
214 title = '%s - %s' % (title, headline)
215 else:
216 title = headline or title
217
218 entries = []
219 for container in season['formatTabPages']['items']:
220 items = try_get(
221 container, lambda x: x['container']['movies']['items'],
222 list) or []
223 for info in items:
224 seo_url = info.get('seoUrl')
225 if not seo_url:
226 continue
227 video_id = info.get('id')
228 entries.append(self.url_result(
229 '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
230 compat_str(video_id) if video_id else None))
231
232 return self.playlist_result(
233 entries, compat_str(season.get('id') or season_id), title)
234
235
236 class TVNowShowIE(TVNowListBaseIE):
237 _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
238
239 _SHOW_FIELDS = ('id', 'title', )
240 _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
241 _VIDEO_FIELDS = ()
242
243 _TESTS = [{
244 'url': 'https://www.tvnow.at/vox/ab-ins-beet',
245 'info_dict': {
246 'id': 'ab-ins-beet',
247 'title': 'Ab ins Beet!',
248 },
249 'playlist_mincount': 7,
250 }, {
251 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
252 'only_matching': True,
253 }, {
254 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
255 'only_matching': True,
256 }]
257
258 @classmethod
259 def suitable(cls, url):
260 return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
261 else super(TVNowShowIE, cls).suitable(url))
262
263 def _real_extract(self, url):
264 base_url, show_id = re.match(self._VALID_URL, url).groups()
265
266 list_info = self._extract_list_info(show_id, show_id)
267
268 entries = []
269 for season_info in list_info['formatTabs']['items']:
270 season_url = season_info.get('seoheadline')
271 if not season_url:
272 continue
273 season_id = season_info.get('id')
274 entries.append(self.url_result(
275 '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
276 compat_str(season_id) if season_id else None,
277 season_info.get('headline')))
278
279 return self.playlist_result(entries, show_id, list_info.get('title'))