Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitch.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5 import re
   6 import random
   7 import json
   8
   9 from .common import InfoExtractor
  10 from ..compat import (
  11     compat_kwargs,
  12     compat_parse_qs,
  13     compat_str,
  14     compat_urllib_parse_urlencode,
  15     compat_urllib_parse_urlparse,
  16 )
  17 from ..utils import (
  18     clean_html,
  19     ExtractorError,
  20     int_or_none,
  21     orderedSet,
  22     parse_duration,
  23     parse_iso8601,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     urljoin,
  29 )
  30
  31
  32 class TwitchBaseIE(InfoExtractor):
  33     _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
  34
  35     _API_BASE = 'https://api.twitch.tv'
  36     _USHER_BASE = 'https://usher.ttvnw.net'
  37     _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
  38     _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
  39     _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
  40     _NETRC_MACHINE = 'twitch'
  41
  42     def _handle_error(self, response):
  43         if not isinstance(response, dict):
  44             return
  45         error = response.get('error')
  46         if error:
  47             raise ExtractorError(
  48                 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
  49                 expected=True)
  50
  51     def _call_api(self, path, item_id, *args, **kwargs):
  52         headers = kwargs.get('headers', {}).copy()
  53         headers['Client-ID'] = self._CLIENT_ID
  54         kwargs['headers'] = headers
  55         response = self._download_json(
  56             '%s/%s' % (self._API_BASE, path), item_id,
  57             *args, **compat_kwargs(kwargs))
  58         self._handle_error(response)
  59         return response
  60
  61     def _real_initialize(self):
  62         self._login()
  63
  64     def _login(self):
  65         username, password = self._get_login_info()
  66         if username is None:
  67             return
  68
  69         def fail(message):
  70             raise ExtractorError(
  71                 'Unable to login. Twitch said: %s' % message, expected=True)
  72
  73         def login_step(page, urlh, note, data):
  74             form = self._hidden_inputs(page)
  75             form.update(data)
  76
  77             page_url = urlh.geturl()
  78             post_url = self._search_regex(
  79                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
  80                 'post url', default=self._LOGIN_POST_URL, group='url')
  81             post_url = urljoin(page_url, post_url)
  82
  83             headers = {
  84                 'Referer': page_url,
  85                 'Origin': page_url,
  86                 'Content-Type': 'text/plain;charset=UTF-8',
  87             }
  88
  89             response = self._download_json(
  90                 post_url, None, note, data=json.dumps(form).encode(),
  91                 headers=headers, expected_status=400)
  92             error = response.get('error_description') or response.get('error_code')
  93             if error:
  94                 fail(error)
  95
  96             if 'Authenticated successfully' in response.get('message', ''):
  97                 return None, None
  98
  99             redirect_url = urljoin(
 100                 post_url,
 101                 response.get('redirect') or response['redirect_path'])
 102             return self._download_webpage_handle(
 103                 redirect_url, None, 'Downloading login redirect page',
 104                 headers=headers)
 105
 106         login_page, handle = self._download_webpage_handle(
 107             self._LOGIN_FORM_URL, None, 'Downloading login page')
 108
 109         # Some TOR nodes and public proxies are blocked completely
 110         if 'blacklist_message' in login_page:
 111             fail(clean_html(login_page))
 112
 113         redirect_page, handle = login_step(
 114             login_page, handle, 'Logging in', {
 115                 'username': username,
 116                 'password': password,
 117                 'client_id': self._CLIENT_ID,
 118             })
 119
 120         # Successful login
 121         if not redirect_page:
 122             return
 123
 124         if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
 125             # TODO: Add mechanism to request an SMS or phone call
 126             tfa_token = self._get_tfa_info('two-factor authentication token')
 127             login_step(redirect_page, handle, 'Submitting TFA token', {
 128                 'authy_token': tfa_token,
 129                 'remember_2fa': 'true',
 130             })
 131
 132     def _prefer_source(self, formats):
 133         try:
 134             source = next(f for f in formats if f['format_id'] == 'Source')
 135             source['quality'] = 10
 136         except StopIteration:
 137             for f in formats:
 138                 if '/chunked/' in f['url']:
 139                     f.update({
 140                         'quality': 10,
 141                         'format_note': 'Source',
 142                     })
 143         self._sort_formats(formats)
 144
 145
 146 class TwitchItemBaseIE(TwitchBaseIE):
 147     def _download_info(self, item, item_id):
 148         return self._extract_info(self._call_api(
 149             'kraken/videos/%s%s' % (item, item_id), item_id,
 150             'Downloading %s info JSON' % self._ITEM_TYPE))
 151
 152     def _extract_media(self, item_id):
 153         info = self._download_info(self._ITEM_SHORTCUT, item_id)
 154         response = self._call_api(
 155             'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id,
 156             'Downloading %s playlist JSON' % self._ITEM_TYPE)
 157         entries = []
 158         chunks = response['chunks']
 159         qualities = list(chunks.keys())
 160         for num, fragment in enumerate(zip(*chunks.values()), start=1):
 161             formats = []
 162             for fmt_num, fragment_fmt in enumerate(fragment):
 163                 format_id = qualities[fmt_num]
 164                 fmt = {
 165                     'url': fragment_fmt['url'],
 166                     'format_id': format_id,
 167                     'quality': 1 if format_id == 'live' else 0,
 168                 }
 169                 m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
 170                 if m:
 171                     fmt['height'] = int(m.group('height'))
 172                 formats.append(fmt)
 173             self._sort_formats(formats)
 174             entry = dict(info)
 175             entry['id'] = '%s_%d' % (entry['id'], num)
 176             entry['title'] = '%s part %d' % (entry['title'], num)
 177             entry['formats'] = formats
 178             entries.append(entry)
 179         return self.playlist_result(entries, info['id'], info['title'])
 180
 181     def _extract_info(self, info):
 182         status = info.get('status')
 183         if status == 'recording':
 184             is_live = True
 185         elif status == 'recorded':
 186             is_live = False
 187         else:
 188             is_live = None
 189         return {
 190             'id': info['_id'],
 191             'title': info.get('title') or 'Untitled Broadcast',
 192             'description': info.get('description'),
 193             'duration': int_or_none(info.get('length')),
 194             'thumbnail': info.get('preview'),
 195             'uploader': info.get('channel', {}).get('display_name'),
 196             'uploader_id': info.get('channel', {}).get('name'),
 197             'timestamp': parse_iso8601(info.get('recorded_at')),
 198             'view_count': int_or_none(info.get('views')),
 199             'is_live': is_live,
 200         }
 201
 202     def _real_extract(self, url):
 203         return self._extract_media(self._match_id(url))
 204
 205
 206 class TwitchVideoIE(TwitchItemBaseIE):
 207     IE_NAME = 'twitch:video'
 208     _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
 209     _ITEM_TYPE = 'video'
 210     _ITEM_SHORTCUT = 'a'
 211
 212     _TEST = {
 213         'url': 'http://www.twitch.tv/riotgames/b/577357806',
 214         'info_dict': {
 215             'id': 'a577357806',
 216             'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
 217         },
 218         'playlist_mincount': 12,
 219         'skip': 'HTTP Error 404: Not Found',
 220     }
 221
 222
 223 class TwitchChapterIE(TwitchItemBaseIE):
 224     IE_NAME = 'twitch:chapter'
 225     _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
 226     _ITEM_TYPE = 'chapter'
 227     _ITEM_SHORTCUT = 'c'
 228
 229     _TESTS = [{
 230         'url': 'http://www.twitch.tv/acracingleague/c/5285812',
 231         'info_dict': {
 232             'id': 'c5285812',
 233             'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
 234         },
 235         'playlist_mincount': 3,
 236         'skip': 'HTTP Error 404: Not Found',
 237     }, {
 238         'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
 239         'only_matching': True,
 240     }]
 241
 242
 243 class TwitchVodIE(TwitchItemBaseIE):
 244     IE_NAME = 'twitch:vod'
 245     _VALID_URL = r'''(?x)
 246                     https?://
 247                         (?:
 248                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
 249                             player\.twitch\.tv/\?.*?\bvideo=v?
 250                         )
 251                         (?P<id>\d+)
 252                     '''
 253     _ITEM_TYPE = 'vod'
 254     _ITEM_SHORTCUT = 'v'
 255
 256     _TESTS = [{
 257         'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
 258         'info_dict': {
 259             'id': 'v6528877',
 260             'ext': 'mp4',
 261             'title': 'LCK Summer Split - Week 6 Day 1',
 262             'thumbnail': r're:^https?://.*\.jpg$',
 263             'duration': 17208,
 264             'timestamp': 1435131709,
 265             'upload_date': '20150624',
 266             'uploader': 'Riot Games',
 267             'uploader_id': 'riotgames',
 268             'view_count': int,
 269             'start_time': 310,
 270         },
 271         'params': {
 272             # m3u8 download
 273             'skip_download': True,
 274         },
 275     }, {
 276         # Untitled broadcast (title is None)
 277         'url': 'http://www.twitch.tv/belkao_o/v/11230755',
 278         'info_dict': {
 279             'id': 'v11230755',
 280             'ext': 'mp4',
 281             'title': 'Untitled Broadcast',
 282             'thumbnail': r're:^https?://.*\.jpg$',
 283             'duration': 1638,
 284             'timestamp': 1439746708,
 285             'upload_date': '20150816',
 286             'uploader': 'BelkAO_o',
 287             'uploader_id': 'belkao_o',
 288             'view_count': int,
 289         },
 290         'params': {
 291             # m3u8 download
 292             'skip_download': True,
 293         },
 294         'skip': 'HTTP Error 404: Not Found',
 295     }, {
 296         'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
 297         'only_matching': True,
 298     }, {
 299         'url': 'https://www.twitch.tv/videos/6528877',
 300         'only_matching': True,
 301     }, {
 302         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
 303         'only_matching': True,
 304     }, {
 305         'url': 'https://www.twitch.tv/northernlion/video/291940395',
 306         'only_matching': True,
 307     }, {
 308         'url': 'https://player.twitch.tv/?video=480452374',
 309         'only_matching': True,
 310     }]
 311
 312     def _real_extract(self, url):
 313         item_id = self._match_id(url)
 314
 315         info = self._download_info(self._ITEM_SHORTCUT, item_id)
 316         access_token = self._call_api(
 317             'api/vods/%s/access_token' % item_id, item_id,
 318             'Downloading %s access token' % self._ITEM_TYPE)
 319
 320         formats = self._extract_m3u8_formats(
 321             '%s/vod/%s.m3u8?%s' % (
 322                 self._USHER_BASE, item_id,
 323                 compat_urllib_parse_urlencode({
 324                     'allow_source': 'true',
 325                     'allow_audio_only': 'true',
 326                     'allow_spectre': 'true',
 327                     'player': 'twitchweb',
 328                     'playlist_include_framerate': 'true',
 329                     'nauth': access_token['token'],
 330                     'nauthsig': access_token['sig'],
 331                 })),
 332             item_id, 'mp4', entry_protocol='m3u8_native')
 333
 334         self._prefer_source(formats)
 335         info['formats'] = formats
 336
 337         parsed_url = compat_urllib_parse_urlparse(url)
 338         query = compat_parse_qs(parsed_url.query)
 339         if 't' in query:
 340             info['start_time'] = parse_duration(query['t'][0])
 341
 342         if info.get('timestamp') is not None:
 343             info['subtitles'] = {
 344                 'rechat': [{
 345                     'url': update_url_query(
 346                         'https://api.twitch.tv/v5/videos/%s/comments' % item_id, {
 347                             'client_id': self._CLIENT_ID,
 348                         }),
 349                     'ext': 'json',
 350                 }],
 351             }
 352
 353         return info
 354
 355
 356 class TwitchPlaylistBaseIE(TwitchBaseIE):
 357     _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d'
 358     _PAGE_LIMIT = 100
 359
 360     def _extract_playlist(self, channel_id):
 361         info = self._call_api(
 362             'kraken/channels/%s' % channel_id,
 363             channel_id, 'Downloading channel info JSON')
 364         channel_name = info.get('display_name') or info.get('name')
 365         entries = []
 366         offset = 0
 367         limit = self._PAGE_LIMIT
 368         broken_paging_detected = False
 369         counter_override = None
 370         for counter in itertools.count(1):
 371             response = self._call_api(
 372                 self._PLAYLIST_PATH % (channel_id, offset, limit),
 373                 channel_id,
 374                 'Downloading %s JSON page %s'
 375                 % (self._PLAYLIST_TYPE, counter_override or counter))
 376             page_entries = self._extract_playlist_page(response)
 377             if not page_entries:
 378                 break
 379             total = int_or_none(response.get('_total'))
 380             # Since the beginning of March 2016 twitch's paging mechanism
 381             # is completely broken on the twitch side. It simply ignores
 382             # a limit and returns the whole offset number of videos.
 383             # Working around by just requesting all videos at once.
 384             # Upd: pagination bug was fixed by twitch on 15.03.2016.
 385             if not broken_paging_detected and total and len(page_entries) > limit:
 386                 self.report_warning(
 387                     'Twitch pagination is broken on twitch side, requesting all videos at once',
 388                     channel_id)
 389                 broken_paging_detected = True
 390                 offset = total
 391                 counter_override = '(all at once)'
 392                 continue
 393             entries.extend(page_entries)
 394             if broken_paging_detected or total and len(page_entries) >= total:
 395                 break
 396             offset += limit
 397         return self.playlist_result(
 398             [self._make_url_result(entry) for entry in orderedSet(entries)],
 399             channel_id, channel_name)
 400
 401     def _make_url_result(self, url):
 402         try:
 403             video_id = 'v%s' % TwitchVodIE._match_id(url)
 404             return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id)
 405         except AssertionError:
 406             return self.url_result(url)
 407
 408     def _extract_playlist_page(self, response):
 409         videos = response.get('videos')
 410         return [video['url'] for video in videos] if videos else []
 411
 412     def _real_extract(self, url):
 413         return self._extract_playlist(self._match_id(url))
 414
 415
 416 class TwitchProfileIE(TwitchPlaylistBaseIE):
 417     IE_NAME = 'twitch:profile'
 418     _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
 419     _PLAYLIST_TYPE = 'profile'
 420
 421     _TESTS = [{
 422         'url': 'http://www.twitch.tv/vanillatv/profile',
 423         'info_dict': {
 424             'id': 'vanillatv',
 425             'title': 'VanillaTV',
 426         },
 427         'playlist_mincount': 412,
 428     }, {
 429         'url': 'http://m.twitch.tv/vanillatv/profile',
 430         'only_matching': True,
 431     }]
 432
 433
 434 class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
 435     _VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
 436     _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
 437
 438
 439 class TwitchAllVideosIE(TwitchVideosBaseIE):
 440     IE_NAME = 'twitch:videos:all'
 441     _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
 442     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
 443     _PLAYLIST_TYPE = 'all videos'
 444
 445     _TESTS = [{
 446         'url': 'https://www.twitch.tv/spamfish/videos/all',
 447         'info_dict': {
 448             'id': 'spamfish',
 449             'title': 'Spamfish',
 450         },
 451         'playlist_mincount': 869,
 452     }, {
 453         'url': 'https://m.twitch.tv/spamfish/videos/all',
 454         'only_matching': True,
 455     }]
 456
 457
 458 class TwitchUploadsIE(TwitchVideosBaseIE):
 459     IE_NAME = 'twitch:videos:uploads'
 460     _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
 461     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
 462     _PLAYLIST_TYPE = 'uploads'
 463
 464     _TESTS = [{
 465         'url': 'https://www.twitch.tv/spamfish/videos/uploads',
 466         'info_dict': {
 467             'id': 'spamfish',
 468             'title': 'Spamfish',
 469         },
 470         'playlist_mincount': 0,
 471     }, {
 472         'url': 'https://m.twitch.tv/spamfish/videos/uploads',
 473         'only_matching': True,
 474     }]
 475
 476
 477 class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
 478     IE_NAME = 'twitch:videos:past-broadcasts'
 479     _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
 480     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
 481     _PLAYLIST_TYPE = 'past broadcasts'
 482
 483     _TESTS = [{
 484         'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
 485         'info_dict': {
 486             'id': 'spamfish',
 487             'title': 'Spamfish',
 488         },
 489         'playlist_mincount': 0,
 490     }, {
 491         'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts',
 492         'only_matching': True,
 493     }]
 494
 495
 496 class TwitchHighlightsIE(TwitchVideosBaseIE):
 497     IE_NAME = 'twitch:videos:highlights'
 498     _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
 499     _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
 500     _PLAYLIST_TYPE = 'highlights'
 501
 502     _TESTS = [{
 503         'url': 'https://www.twitch.tv/spamfish/videos/highlights',
 504         'info_dict': {
 505             'id': 'spamfish',
 506             'title': 'Spamfish',
 507         },
 508         'playlist_mincount': 805,
 509     }, {
 510         'url': 'https://m.twitch.tv/spamfish/videos/highlights',
 511         'only_matching': True,
 512     }]
 513
 514
 515 class TwitchStreamIE(TwitchBaseIE):
 516     IE_NAME = 'twitch:stream'
 517     _VALID_URL = r'''(?x)
 518                     https?://
 519                         (?:
 520                             (?:(?:www|go|m)\.)?twitch\.tv/|
 521                             player\.twitch\.tv/\?.*?\bchannel=
 522                         )
 523                         (?P<id>[^/#?]+)
 524                     '''
 525
 526     _TESTS = [{
 527         'url': 'http://www.twitch.tv/shroomztv',
 528         'info_dict': {
 529             'id': '12772022048',
 530             'display_id': 'shroomztv',
 531             'ext': 'mp4',
 532             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 533             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
 534             'is_live': True,
 535             'timestamp': 1421928037,
 536             'upload_date': '20150122',
 537             'uploader': 'ShroomzTV',
 538             'uploader_id': 'shroomztv',
 539             'view_count': int,
 540         },
 541         'params': {
 542             # m3u8 download
 543             'skip_download': True,
 544         },
 545     }, {
 546         'url': 'http://www.twitch.tv/miracle_doto#profile-0',
 547         'only_matching': True,
 548     }, {
 549         'url': 'https://player.twitch.tv/?channel=lotsofs',
 550         'only_matching': True,
 551     }, {
 552         'url': 'https://go.twitch.tv/food',
 553         'only_matching': True,
 554     }, {
 555         'url': 'https://m.twitch.tv/food',
 556         'only_matching': True,
 557     }]
 558
 559     @classmethod
 560     def suitable(cls, url):
 561         return (False
 562                 if any(ie.suitable(url) for ie in (
 563                     TwitchVideoIE,
 564                     TwitchChapterIE,
 565                     TwitchVodIE,
 566                     TwitchProfileIE,
 567                     TwitchAllVideosIE,
 568                     TwitchUploadsIE,
 569                     TwitchPastBroadcastsIE,
 570                     TwitchHighlightsIE,
 571                     TwitchClipsIE))
 572                 else super(TwitchStreamIE, cls).suitable(url))
 573
 574     def _real_extract(self, url):
 575         channel_id = self._match_id(url)
 576
 577         stream = self._call_api(
 578             'kraken/streams/%s?stream_type=all' % channel_id.lower(),
 579             channel_id, 'Downloading stream JSON').get('stream')
 580
 581         if not stream:
 582             raise ExtractorError('%s is offline' % channel_id, expected=True)
 583
 584         # Channel name may be typed if different case than the original channel name
 585         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
 586         # an invalid m3u8 URL. Working around by use of original channel name from stream
 587         # JSON and fallback to lowercase if it's not available.
 588         channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
 589
 590         access_token = self._call_api(
 591             'api/channels/%s/access_token' % channel_id, channel_id,
 592             'Downloading channel access token')
 593
 594         query = {
 595             'allow_source': 'true',
 596             'allow_audio_only': 'true',
 597             'allow_spectre': 'true',
 598             'p': random.randint(1000000, 10000000),
 599             'player': 'twitchweb',
 600             'playlist_include_framerate': 'true',
 601             'segment_preference': '4',
 602             'sig': access_token['sig'].encode('utf-8'),
 603             'token': access_token['token'].encode('utf-8'),
 604         }
 605         formats = self._extract_m3u8_formats(
 606             '%s/api/channel/hls/%s.m3u8?%s'
 607             % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)),
 608             channel_id, 'mp4')
 609         self._prefer_source(formats)
 610
 611         view_count = stream.get('viewers')
 612         timestamp = parse_iso8601(stream.get('created_at'))
 613
 614         channel = stream['channel']
 615         title = self._live_title(channel.get('display_name') or channel.get('name'))
 616         description = channel.get('status')
 617
 618         thumbnails = []
 619         for thumbnail_key, thumbnail_url in stream['preview'].items():
 620             m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
 621             if not m:
 622                 continue
 623             thumbnails.append({
 624                 'url': thumbnail_url,
 625                 'width': int(m.group('width')),
 626                 'height': int(m.group('height')),
 627             })
 628
 629         return {
 630             'id': compat_str(stream['_id']),
 631             'display_id': channel_id,
 632             'title': title,
 633             'description': description,
 634             'thumbnails': thumbnails,
 635             'uploader': channel.get('display_name'),
 636             'uploader_id': channel.get('name'),
 637             'timestamp': timestamp,
 638             'view_count': view_count,
 639             'formats': formats,
 640             'is_live': True,
 641         }
 642
 643
 644 class TwitchClipsIE(TwitchBaseIE):
 645     IE_NAME = 'twitch:clips'
 646     _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
 647
 648     _TESTS = [{
 649         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
 650         'md5': '761769e1eafce0ffebfb4089cb3847cd',
 651         'info_dict': {
 652             'id': '42850523',
 653             'ext': 'mp4',
 654             'title': 'EA Play 2016 Live from the Novo Theatre',
 655             'thumbnail': r're:^https?://.*\.jpg',
 656             'timestamp': 1465767393,
 657             'upload_date': '20160612',
 658             'creator': 'EA',
 659             'uploader': 'stereotype_',
 660             'uploader_id': '43566419',
 661         },
 662     }, {
 663         # multiple formats
 664         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
 665         'only_matching': True,
 666     }, {
 667         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
 668         'only_matching': True,
 669     }, {
 670         'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
 671         'only_matching': True,
 672     }]
 673
 674     def _real_extract(self, url):
 675         video_id = self._match_id(url)
 676
 677         clip = self._download_json(
 678             'https://gql.twitch.tv/gql', video_id, data=json.dumps({
 679                 'query': '''{
 680   clip(slug: "%s") {
 681     broadcaster {
 682       displayName
 683     }
 684     createdAt
 685     curator {
 686       displayName
 687       id
 688     }
 689     durationSeconds
 690     id
 691     tiny: thumbnailURL(width: 86, height: 45)
 692     small: thumbnailURL(width: 260, height: 147)
 693     medium: thumbnailURL(width: 480, height: 272)
 694     title
 695     videoQualities {
 696       frameRate
 697       quality
 698       sourceURL
 699     }
 700     viewCount
 701   }
 702 }''' % video_id,
 703             }).encode(), headers={
 704                 'Client-ID': self._CLIENT_ID,
 705             })['data']['clip']
 706
 707         if not clip:
 708             raise ExtractorError(
 709                 'This clip is no longer available', expected=True)
 710
 711         formats = []
 712         for option in clip.get('videoQualities', []):
 713             if not isinstance(option, dict):
 714                 continue
 715             source = url_or_none(option.get('sourceURL'))
 716             if not source:
 717                 continue
 718             formats.append({
 719                 'url': source,
 720                 'format_id': option.get('quality'),
 721                 'height': int_or_none(option.get('quality')),
 722                 'fps': int_or_none(option.get('frameRate')),
 723             })
 724         self._sort_formats(formats)
 725
 726         thumbnails = []
 727         for thumbnail_id in ('tiny', 'small', 'medium'):
 728             thumbnail_url = clip.get(thumbnail_id)
 729             if not thumbnail_url:
 730                 continue
 731             thumb = {
 732                 'id': thumbnail_id,
 733                 'url': thumbnail_url,
 734             }
 735             mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
 736             if mobj:
 737                 thumb.update({
 738                     'height': int(mobj.group(2)),
 739                     'width': int(mobj.group(1)),
 740                 })
 741             thumbnails.append(thumb)
 742
 743         return {
 744             'id': clip.get('id') or video_id,
 745             'title': clip.get('title') or video_id,
 746             'formats': formats,
 747             'duration': int_or_none(clip.get('durationSeconds')),
 748             'views': int_or_none(clip.get('viewCount')),
 749             'timestamp': unified_timestamp(clip.get('createdAt')),
 750             'thumbnails': thumbnails,
 751             'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
 752             'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
 753             'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
 754         }