]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zattoo.py
debian/control: Mark compliance with Debian policy 4.1.5.
[youtubedl] / youtube_dl / extractor / zattoo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 from uuid import uuid4
6
7 from .common import InfoExtractor
8 from ..compat import (
9 compat_HTTPError,
10 compat_str,
11 )
12 from ..utils import (
13 ExtractorError,
14 int_or_none,
15 try_get,
16 url_or_none,
17 urlencode_postdata,
18 )
19
20
21 class ZattooBaseIE(InfoExtractor):
22 _NETRC_MACHINE = 'zattoo'
23 _HOST_URL = 'https://zattoo.com'
24
25 _power_guide_hash = None
26
27 def _login(self):
28 username, password = self._get_login_info()
29 if not username or not password:
30 self.raise_login_required(
31 'A valid %s account is needed to access this media.'
32 % self._NETRC_MACHINE)
33
34 try:
35 data = self._download_json(
36 '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
37 data=urlencode_postdata({
38 'login': username,
39 'password': password,
40 'remember': 'true',
41 }), headers={
42 'Referer': '%s/login' % self._HOST_URL,
43 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
44 })
45 except ExtractorError as e:
46 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
47 raise ExtractorError(
48 'Unable to login: incorrect username and/or password',
49 expected=True)
50 raise
51
52 self._power_guide_hash = data['session']['power_guide_hash']
53
54 def _real_initialize(self):
55 webpage = self._download_webpage(
56 self._HOST_URL, None, 'Downloading app token')
57 app_token = self._html_search_regex(
58 r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
59 webpage, 'app token', group='token')
60 app_version = self._html_search_regex(
61 r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
62
63 # Will setup appropriate cookies
64 self._request_webpage(
65 '%s/zapi/v2/session/hello' % self._HOST_URL, None,
66 'Opening session', data=urlencode_postdata({
67 'client_app_token': app_token,
68 'uuid': compat_str(uuid4()),
69 'lang': 'en',
70 'app_version': app_version,
71 'format': 'json',
72 }))
73
74 self._login()
75
76 def _extract_cid(self, video_id, channel_name):
77 channel_groups = self._download_json(
78 '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
79 self._power_guide_hash),
80 video_id, 'Downloading channel list',
81 query={'details': False})['channel_groups']
82 channel_list = []
83 for chgrp in channel_groups:
84 channel_list.extend(chgrp['channels'])
85 try:
86 return next(
87 chan['cid'] for chan in channel_list
88 if chan.get('cid') and (
89 chan.get('display_alias') == channel_name or
90 chan.get('cid') == channel_name))
91 except StopIteration:
92 raise ExtractorError('Could not extract channel id')
93
94 def _extract_cid_and_video_info(self, video_id):
95 data = self._download_json(
96 '%s/zapi/program/details' % self._HOST_URL,
97 video_id,
98 'Downloading video information',
99 query={
100 'program_id': video_id,
101 'complete': True
102 })
103
104 p = data['program']
105 cid = p['cid']
106
107 info_dict = {
108 'id': video_id,
109 'title': p.get('title') or p['episode_title'],
110 'description': p.get('description'),
111 'thumbnail': p.get('image_url'),
112 'creator': p.get('channel_name'),
113 'episode': p.get('episode_title'),
114 'episode_number': int_or_none(p.get('episode_number')),
115 'season_number': int_or_none(p.get('season_number')),
116 'release_year': int_or_none(p.get('year')),
117 'categories': try_get(p, lambda x: x['categories'], list),
118 }
119
120 return cid, info_dict
121
122 def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
123 postdata_common = {
124 'https_watch_urls': True,
125 }
126
127 if is_live:
128 postdata_common.update({'timeshift': 10800})
129 url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
130 elif record_id:
131 url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
132 else:
133 url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
134
135 formats = []
136 for stream_type in ('dash', 'hls', 'hls5', 'hds'):
137 postdata = postdata_common.copy()
138 postdata['stream_type'] = stream_type
139
140 data = self._download_json(
141 url, video_id, 'Downloading %s formats' % stream_type.upper(),
142 data=urlencode_postdata(postdata), fatal=False)
143 if not data:
144 continue
145
146 watch_urls = try_get(
147 data, lambda x: x['stream']['watch_urls'], list)
148 if not watch_urls:
149 continue
150
151 for watch in watch_urls:
152 if not isinstance(watch, dict):
153 continue
154 watch_url = url_or_none(watch.get('url'))
155 if not watch_url:
156 continue
157 format_id_list = [stream_type]
158 maxrate = watch.get('maxrate')
159 if maxrate:
160 format_id_list.append(compat_str(maxrate))
161 audio_channel = watch.get('audio_channel')
162 if audio_channel:
163 format_id_list.append(compat_str(audio_channel))
164 preference = 1 if audio_channel == 'A' else None
165 format_id = '-'.join(format_id_list)
166 if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
167 this_formats = self._extract_mpd_formats(
168 watch_url, video_id, mpd_id=format_id, fatal=False)
169 elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
170 this_formats = self._extract_m3u8_formats(
171 watch_url, video_id, 'mp4',
172 entry_protocol='m3u8_native', m3u8_id=format_id,
173 fatal=False)
174 elif stream_type == 'hds':
175 this_formats = self._extract_f4m_formats(
176 watch_url, video_id, f4m_id=format_id, fatal=False)
177 elif stream_type == 'smooth_playready':
178 this_formats = self._extract_ism_formats(
179 watch_url, video_id, ism_id=format_id, fatal=False)
180 else:
181 assert False
182 for this_format in this_formats:
183 this_format['preference'] = preference
184 formats.extend(this_formats)
185 self._sort_formats(formats)
186 return formats
187
188 def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
189 if is_live:
190 cid = self._extract_cid(video_id, channel_name)
191 info_dict = {
192 'id': channel_name,
193 'title': self._live_title(channel_name),
194 'is_live': True,
195 }
196 else:
197 cid, info_dict = self._extract_cid_and_video_info(video_id)
198 formats = self._extract_formats(
199 cid, video_id, record_id=record_id, is_live=is_live)
200 info_dict['formats'] = formats
201 return info_dict
202
203
204 class QuicklineBaseIE(ZattooBaseIE):
205 _NETRC_MACHINE = 'quickline'
206 _HOST_URL = 'https://mobiltv.quickline.com'
207
208
209 class QuicklineIE(QuicklineBaseIE):
210 _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
211
212 _TEST = {
213 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
214 'only_matching': True,
215 }
216
217 def _real_extract(self, url):
218 channel_name, video_id = re.match(self._VALID_URL, url).groups()
219 return self._extract_video(channel_name, video_id)
220
221
222 class QuicklineLiveIE(QuicklineBaseIE):
223 _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
224
225 _TEST = {
226 'url': 'https://mobiltv.quickline.com/watch/srf1',
227 'only_matching': True,
228 }
229
230 @classmethod
231 def suitable(cls, url):
232 return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
233
234 def _real_extract(self, url):
235 channel_name = video_id = self._match_id(url)
236 return self._extract_video(channel_name, video_id, is_live=True)
237
238
239 class ZattooIE(ZattooBaseIE):
240 _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
241
242 # Since regular videos are only available for 7 days and recorded videos
243 # are only available for a specific user, we cannot have detailed tests.
244 _TESTS = [{
245 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
246 'only_matching': True,
247 }, {
248 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
249 'only_matching': True,
250 }]
251
252 def _real_extract(self, url):
253 channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
254 return self._extract_video(channel_name, video_id, record_id)
255
256
257 class ZattooLiveIE(ZattooBaseIE):
258 _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
259
260 _TEST = {
261 'url': 'https://zattoo.com/watch/srf1',
262 'only_matching': True,
263 }
264
265 @classmethod
266 def suitable(cls, url):
267 return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
268
269 def _real_extract(self, url):
270 channel_name = video_id = self._match_id(url)
271 return self._extract_video(channel_name, video_id, is_live=True)