]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zattoo.py
debian/control: Remove trailing whitespace at EOF.
[youtubedl] / youtube_dl / extractor / zattoo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 from uuid import uuid4
6
7 from .common import InfoExtractor
8 from ..compat import (
9 compat_HTTPError,
10 compat_str,
11 )
12 from ..utils import (
13 ExtractorError,
14 int_or_none,
15 try_get,
16 urlencode_postdata,
17 )
18
19
20 class ZattooBaseIE(InfoExtractor):
21 _NETRC_MACHINE = 'zattoo'
22 _HOST_URL = 'https://zattoo.com'
23
24 _power_guide_hash = None
25
26 def _login(self):
27 username, password = self._get_login_info()
28 if not username or not password:
29 self.raise_login_required(
30 'A valid %s account is needed to access this media.'
31 % self._NETRC_MACHINE)
32
33 try:
34 data = self._download_json(
35 '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
36 data=urlencode_postdata({
37 'login': username,
38 'password': password,
39 'remember': 'true',
40 }), headers={
41 'Referer': '%s/login' % self._HOST_URL,
42 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
43 })
44 except ExtractorError as e:
45 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
46 raise ExtractorError(
47 'Unable to login: incorrect username and/or password',
48 expected=True)
49 raise
50
51 self._power_guide_hash = data['session']['power_guide_hash']
52
53 def _real_initialize(self):
54 webpage = self._download_webpage(
55 self._HOST_URL, None, 'Downloading app token')
56 app_token = self._html_search_regex(
57 r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
58 webpage, 'app token', group='token')
59 app_version = self._html_search_regex(
60 r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
61
62 # Will setup appropriate cookies
63 self._request_webpage(
64 '%s/zapi/v2/session/hello' % self._HOST_URL, None,
65 'Opening session', data=urlencode_postdata({
66 'client_app_token': app_token,
67 'uuid': compat_str(uuid4()),
68 'lang': 'en',
69 'app_version': app_version,
70 'format': 'json',
71 }))
72
73 self._login()
74
75 def _extract_cid(self, video_id, channel_name):
76 channel_groups = self._download_json(
77 '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
78 self._power_guide_hash),
79 video_id, 'Downloading channel list',
80 query={'details': False})['channel_groups']
81 channel_list = []
82 for chgrp in channel_groups:
83 channel_list.extend(chgrp['channels'])
84 try:
85 return next(
86 chan['cid'] for chan in channel_list
87 if chan.get('cid') and (
88 chan.get('display_alias') == channel_name or
89 chan.get('cid') == channel_name))
90 except StopIteration:
91 raise ExtractorError('Could not extract channel id')
92
93 def _extract_cid_and_video_info(self, video_id):
94 data = self._download_json(
95 '%s/zapi/program/details' % self._HOST_URL,
96 video_id,
97 'Downloading video information',
98 query={
99 'program_id': video_id,
100 'complete': True
101 })
102
103 p = data['program']
104 cid = p['cid']
105
106 info_dict = {
107 'id': video_id,
108 'title': p.get('title') or p['episode_title'],
109 'description': p.get('description'),
110 'thumbnail': p.get('image_url'),
111 'creator': p.get('channel_name'),
112 'episode': p.get('episode_title'),
113 'episode_number': int_or_none(p.get('episode_number')),
114 'season_number': int_or_none(p.get('season_number')),
115 'release_year': int_or_none(p.get('year')),
116 'categories': try_get(p, lambda x: x['categories'], list),
117 }
118
119 return cid, info_dict
120
121 def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
122 postdata_common = {
123 'https_watch_urls': True,
124 }
125
126 if is_live:
127 postdata_common.update({'timeshift': 10800})
128 url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
129 elif record_id:
130 url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
131 else:
132 url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
133
134 formats = []
135 for stream_type in ('dash', 'hls', 'hls5', 'hds'):
136 postdata = postdata_common.copy()
137 postdata['stream_type'] = stream_type
138
139 data = self._download_json(
140 url, video_id, 'Downloading %s formats' % stream_type.upper(),
141 data=urlencode_postdata(postdata), fatal=False)
142 if not data:
143 continue
144
145 watch_urls = try_get(
146 data, lambda x: x['stream']['watch_urls'], list)
147 if not watch_urls:
148 continue
149
150 for watch in watch_urls:
151 if not isinstance(watch, dict):
152 continue
153 watch_url = watch.get('url')
154 if not watch_url or not isinstance(watch_url, compat_str):
155 continue
156 format_id_list = [stream_type]
157 maxrate = watch.get('maxrate')
158 if maxrate:
159 format_id_list.append(compat_str(maxrate))
160 audio_channel = watch.get('audio_channel')
161 if audio_channel:
162 format_id_list.append(compat_str(audio_channel))
163 preference = 1 if audio_channel == 'A' else None
164 format_id = '-'.join(format_id_list)
165 if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
166 this_formats = self._extract_mpd_formats(
167 watch_url, video_id, mpd_id=format_id, fatal=False)
168 elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
169 this_formats = self._extract_m3u8_formats(
170 watch_url, video_id, 'mp4',
171 entry_protocol='m3u8_native', m3u8_id=format_id,
172 fatal=False)
173 elif stream_type == 'hds':
174 this_formats = self._extract_f4m_formats(
175 watch_url, video_id, f4m_id=format_id, fatal=False)
176 elif stream_type == 'smooth_playready':
177 this_formats = self._extract_ism_formats(
178 watch_url, video_id, ism_id=format_id, fatal=False)
179 else:
180 assert False
181 for this_format in this_formats:
182 this_format['preference'] = preference
183 formats.extend(this_formats)
184 self._sort_formats(formats)
185 return formats
186
187 def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
188 if is_live:
189 cid = self._extract_cid(video_id, channel_name)
190 info_dict = {
191 'id': channel_name,
192 'title': self._live_title(channel_name),
193 'is_live': True,
194 }
195 else:
196 cid, info_dict = self._extract_cid_and_video_info(video_id)
197 formats = self._extract_formats(
198 cid, video_id, record_id=record_id, is_live=is_live)
199 info_dict['formats'] = formats
200 return info_dict
201
202
203 class QuicklineBaseIE(ZattooBaseIE):
204 _NETRC_MACHINE = 'quickline'
205 _HOST_URL = 'https://mobiltv.quickline.com'
206
207
208 class QuicklineIE(QuicklineBaseIE):
209 _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
210
211 _TEST = {
212 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
213 'only_matching': True,
214 }
215
216 def _real_extract(self, url):
217 channel_name, video_id = re.match(self._VALID_URL, url).groups()
218 return self._extract_video(channel_name, video_id)
219
220
221 class QuicklineLiveIE(QuicklineBaseIE):
222 _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
223
224 _TEST = {
225 'url': 'https://mobiltv.quickline.com/watch/srf1',
226 'only_matching': True,
227 }
228
229 @classmethod
230 def suitable(cls, url):
231 return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
232
233 def _real_extract(self, url):
234 channel_name = video_id = self._match_id(url)
235 return self._extract_video(channel_name, video_id, is_live=True)
236
237
238 class ZattooIE(ZattooBaseIE):
239 _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
240
241 # Since regular videos are only available for 7 days and recorded videos
242 # are only available for a specific user, we cannot have detailed tests.
243 _TESTS = [{
244 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
245 'only_matching': True,
246 }, {
247 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
248 'only_matching': True,
249 }]
250
251 def _real_extract(self, url):
252 channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
253 return self._extract_video(channel_name, video_id, record_id)
254
255
256 class ZattooLiveIE(ZattooBaseIE):
257 _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
258
259 _TEST = {
260 'url': 'https://zattoo.com/watch/srf1',
261 'only_matching': True,
262 }
263
264 @classmethod
265 def suitable(cls, url):
266 return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
267
268 def _real_extract(self, url):
269 channel_name = video_id = self._match_id(url)
270 return self._extract_video(channel_name, video_id, is_live=True)