Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/zattoo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 from uuid import uuid4
   6
   7 from .common import InfoExtractor
   8 from ..compat import (
   9     compat_HTTPError,
  10     compat_str,
  11 )
  12 from ..utils import (
  13     ExtractorError,
  14     int_or_none,
  15     try_get,
  16     url_or_none,
  17     urlencode_postdata,
  18 )
  19
  20
  21 class ZattooBaseIE(InfoExtractor):
  22     _NETRC_MACHINE = 'zattoo'
  23     _HOST_URL = 'https://zattoo.com'
  24
  25     _power_guide_hash = None
  26
  27     def _login(self):
  28         username, password = self._get_login_info()
  29         if not username or not password:
  30             self.raise_login_required(
  31                 'A valid %s account is needed to access this media.'
  32                 % self._NETRC_MACHINE)
  33
  34         try:
  35             data = self._download_json(
  36                 '%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
  37                 data=urlencode_postdata({
  38                     'login': username,
  39                     'password': password,
  40                     'remember': 'true',
  41                 }), headers={
  42                     'Referer': '%s/login' % self._HOST_URL,
  43                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  44                 })
  45         except ExtractorError as e:
  46             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  47                 raise ExtractorError(
  48                     'Unable to login: incorrect username and/or password',
  49                     expected=True)
  50             raise
  51
  52         self._power_guide_hash = data['session']['power_guide_hash']
  53
  54     def _real_initialize(self):
  55         webpage = self._download_webpage(
  56             self._HOST_URL, None, 'Downloading app token')
  57         app_token = self._html_search_regex(
  58             r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
  59             webpage, 'app token', group='token')
  60         app_version = self._html_search_regex(
  61             r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
  62
  63         # Will setup appropriate cookies
  64         self._request_webpage(
  65             '%s/zapi/v2/session/hello' % self._HOST_URL, None,
  66             'Opening session', data=urlencode_postdata({
  67                 'client_app_token': app_token,
  68                 'uuid': compat_str(uuid4()),
  69                 'lang': 'en',
  70                 'app_version': app_version,
  71                 'format': 'json',
  72             }))
  73
  74         self._login()
  75
  76     def _extract_cid(self, video_id, channel_name):
  77         channel_groups = self._download_json(
  78             '%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
  79                                                self._power_guide_hash),
  80             video_id, 'Downloading channel list',
  81             query={'details': False})['channel_groups']
  82         channel_list = []
  83         for chgrp in channel_groups:
  84             channel_list.extend(chgrp['channels'])
  85         try:
  86             return next(
  87                 chan['cid'] for chan in channel_list
  88                 if chan.get('cid') and (
  89                     chan.get('display_alias') == channel_name or
  90                     chan.get('cid') == channel_name))
  91         except StopIteration:
  92             raise ExtractorError('Could not extract channel id')
  93
  94     def _extract_cid_and_video_info(self, video_id):
  95         data = self._download_json(
  96             '%s/zapi/program/details' % self._HOST_URL,
  97             video_id,
  98             'Downloading video information',
  99             query={
 100                 'program_id': video_id,
 101                 'complete': True
 102             })
 103
 104         p = data['program']
 105         cid = p['cid']
 106
 107         info_dict = {
 108             'id': video_id,
 109             'title': p.get('title') or p['episode_title'],
 110             'description': p.get('description'),
 111             'thumbnail': p.get('image_url'),
 112             'creator': p.get('channel_name'),
 113             'episode': p.get('episode_title'),
 114             'episode_number': int_or_none(p.get('episode_number')),
 115             'season_number': int_or_none(p.get('season_number')),
 116             'release_year': int_or_none(p.get('year')),
 117             'categories': try_get(p, lambda x: x['categories'], list),
 118         }
 119
 120         return cid, info_dict
 121
 122     def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
 123         postdata_common = {
 124             'https_watch_urls': True,
 125         }
 126
 127         if is_live:
 128             postdata_common.update({'timeshift': 10800})
 129             url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
 130         elif record_id:
 131             url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
 132         else:
 133             url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
 134
 135         formats = []
 136         for stream_type in ('dash', 'hls', 'hls5', 'hds'):
 137             postdata = postdata_common.copy()
 138             postdata['stream_type'] = stream_type
 139
 140             data = self._download_json(
 141                 url, video_id, 'Downloading %s formats' % stream_type.upper(),
 142                 data=urlencode_postdata(postdata), fatal=False)
 143             if not data:
 144                 continue
 145
 146             watch_urls = try_get(
 147                 data, lambda x: x['stream']['watch_urls'], list)
 148             if not watch_urls:
 149                 continue
 150
 151             for watch in watch_urls:
 152                 if not isinstance(watch, dict):
 153                     continue
 154                 watch_url = url_or_none(watch.get('url'))
 155                 if not watch_url:
 156                     continue
 157                 format_id_list = [stream_type]
 158                 maxrate = watch.get('maxrate')
 159                 if maxrate:
 160                     format_id_list.append(compat_str(maxrate))
 161                 audio_channel = watch.get('audio_channel')
 162                 if audio_channel:
 163                     format_id_list.append(compat_str(audio_channel))
 164                 preference = 1 if audio_channel == 'A' else None
 165                 format_id = '-'.join(format_id_list)
 166                 if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
 167                     this_formats = self._extract_mpd_formats(
 168                         watch_url, video_id, mpd_id=format_id, fatal=False)
 169                 elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
 170                     this_formats = self._extract_m3u8_formats(
 171                         watch_url, video_id, 'mp4',
 172                         entry_protocol='m3u8_native', m3u8_id=format_id,
 173                         fatal=False)
 174                 elif stream_type == 'hds':
 175                     this_formats = self._extract_f4m_formats(
 176                         watch_url, video_id, f4m_id=format_id, fatal=False)
 177                 elif stream_type == 'smooth_playready':
 178                     this_formats = self._extract_ism_formats(
 179                         watch_url, video_id, ism_id=format_id, fatal=False)
 180                 else:
 181                     assert False
 182                 for this_format in this_formats:
 183                     this_format['preference'] = preference
 184                 formats.extend(this_formats)
 185         self._sort_formats(formats)
 186         return formats
 187
 188     def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
 189         if is_live:
 190             cid = self._extract_cid(video_id, channel_name)
 191             info_dict = {
 192                 'id': channel_name,
 193                 'title': self._live_title(channel_name),
 194                 'is_live': True,
 195             }
 196         else:
 197             cid, info_dict = self._extract_cid_and_video_info(video_id)
 198         formats = self._extract_formats(
 199             cid, video_id, record_id=record_id, is_live=is_live)
 200         info_dict['formats'] = formats
 201         return info_dict
 202
 203
 204 class QuicklineBaseIE(ZattooBaseIE):
 205     _NETRC_MACHINE = 'quickline'
 206     _HOST_URL = 'https://mobiltv.quickline.com'
 207
 208
 209 class QuicklineIE(QuicklineBaseIE):
 210     _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
 211
 212     _TEST = {
 213         'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
 214         'only_matching': True,
 215     }
 216
 217     def _real_extract(self, url):
 218         channel_name, video_id = re.match(self._VALID_URL, url).groups()
 219         return self._extract_video(channel_name, video_id)
 220
 221
 222 class QuicklineLiveIE(QuicklineBaseIE):
 223     _VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
 224
 225     _TEST = {
 226         'url': 'https://mobiltv.quickline.com/watch/srf1',
 227         'only_matching': True,
 228     }
 229
 230     @classmethod
 231     def suitable(cls, url):
 232         return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
 233
 234     def _real_extract(self, url):
 235         channel_name = video_id = self._match_id(url)
 236         return self._extract_video(channel_name, video_id, is_live=True)
 237
 238
 239 class ZattooIE(ZattooBaseIE):
 240     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
 241
 242     # Since regular videos are only available for 7 days and recorded videos
 243     # are only available for a specific user, we cannot have detailed tests.
 244     _TESTS = [{
 245         'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
 246         'only_matching': True,
 247     }, {
 248         'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
 249         'only_matching': True,
 250     }]
 251
 252     def _real_extract(self, url):
 253         channel_name, video_id, record_id = re.match(self._VALID_URL, url).groups()
 254         return self._extract_video(channel_name, video_id, record_id)
 255
 256
 257 class ZattooLiveIE(ZattooBaseIE):
 258     _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
 259
 260     _TEST = {
 261         'url': 'https://zattoo.com/watch/srf1',
 262         'only_matching': True,
 263     }
 264
 265     @classmethod
 266     def suitable(cls, url):
 267         return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
 268
 269     def _real_extract(self, url):
 270         channel_name = video_id = self._match_id(url)
 271         return self._extract_video(channel_name, video_id, is_live=True)