2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  14     compat_urllib_parse_urlencode
, 
  15     compat_urllib_parse_urlparse
, 
  34 class TwitchBaseIE(InfoExtractor
): 
  35     _VALID_URL_BASE 
= r
'https?://(?:(?:www|go|m)\.)?twitch\.tv' 
  37     _API_BASE 
= 'https://api.twitch.tv' 
  38     _USHER_BASE 
= 'https://usher.ttvnw.net' 
  39     _LOGIN_URL 
= 'https://www.twitch.tv/login' 
  40     _CLIENT_ID 
= 'jzkbprff40iqj646a697cyrvl0zt2m6' 
  41     _NETRC_MACHINE 
= 'twitch' 
  43     def _handle_error(self
, response
): 
  44         if not isinstance(response
, dict): 
  46         error 
= response
.get('error') 
  49                 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')), 
  52     def _call_api(self
, path
, item_id
, *args
, **kwargs
): 
  53         kwargs
.setdefault('headers', {})['Client-ID'] = self
._CLIENT
_ID
 
  54         response 
= self
._download
_json
( 
  55             '%s/%s' % (self
._API
_BASE
, path
), item_id
, 
  56             *args
, **compat_kwargs(kwargs
)) 
  57         self
._handle
_error
(response
) 
  60     def _real_initialize(self
): 
  64         username
, password 
= self
._get
_login
_info
() 
  70                 'Unable to login. Twitch said: %s' % message
, expected
=True) 
  72         def login_step(page
, urlh
, note
, data
): 
  73             form 
= self
._hidden
_inputs
(page
) 
  76             page_url 
= urlh
.geturl() 
  77             post_url 
= self
._search
_regex
( 
  78                 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', page, 
  79                 'post url
', default=page_url, group='url
') 
  80             post_url = urljoin(page_url, post_url) 
  82             headers = {'Referer
': page_url} 
  85                 response = self._download_json( 
  87                     data=urlencode_postdata(form), 
  89             except ExtractorError as e: 
  90                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: 
  91                     response = self._parse_json( 
  92                         e.cause.read().decode('utf
-8'), None) 
  93                     fail(response.get('message
') or response['errors
'][0]) 
  96             if 'Authenticated successfully
' in response.get('message
', ''): 
  99             redirect_url = urljoin( 
 101                 response.get('redirect
') or response['redirect_path
']) 
 102             return self._download_webpage_handle( 
 103                 redirect_url, None, 'Downloading login redirect page
', 
 106         login_page, handle = self._download_webpage_handle( 
 107             self._LOGIN_URL, None, 'Downloading login page
') 
 109         # Some TOR nodes and public proxies are blocked completely 
 110         if 'blacklist_message
' in login_page: 
 111             fail(clean_html(login_page)) 
 113         redirect_page, handle = login_step( 
 114             login_page, handle, 'Logging 
in', { 
 115                 'username
': username, 
 116                 'password
': password, 
 120         if not redirect_page: 
 123         if re.search(r'(?i
)<form
[^
>]+id="two-factor-submit"', redirect_page) is not None: 
 124             # TODO: Add mechanism to request an SMS or phone call 
 125             tfa_token = self._get_tfa_info('two
-factor authentication token
') 
 126             login_step(redirect_page, handle, 'Submitting TFA token
', { 
 127                 'authy_token
': tfa_token, 
 128                 'remember_2fa
': 'true
', 
 131     def _prefer_source(self, formats): 
 133             source = next(f for f in formats if f['format_id
'] == 'Source
') 
 134             source['preference
'] = 10 
 135         except StopIteration: 
 136             pass  # No Source stream present 
 137         self._sort_formats(formats) 
 140 class TwitchItemBaseIE(TwitchBaseIE): 
 141     def _download_info(self, item, item_id): 
 142         return self._extract_info(self._call_api( 
 143             'kraken
/videos
/%s%s' % (item, item_id), item_id, 
 144             'Downloading 
%s info JSON
' % self._ITEM_TYPE)) 
 146     def _extract_media(self, item_id): 
 147         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 148         response = self._call_api( 
 149             'api
/videos
/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, 
 150             'Downloading 
%s playlist JSON
' % self._ITEM_TYPE) 
 152         chunks = response['chunks
'] 
 153         qualities = list(chunks.keys()) 
 154         for num, fragment in enumerate(zip(*chunks.values()), start=1): 
 156             for fmt_num, fragment_fmt in enumerate(fragment): 
 157                 format_id = qualities[fmt_num] 
 159                     'url
': fragment_fmt['url
'], 
 160                     'format_id
': format_id, 
 161                     'quality
': 1 if format_id == 'live
' else 0, 
 163                 m = re.search(r'^
(?P
<height
>\d
+)[Pp
]', format_id) 
 165                     fmt['height
'] = int(m.group('height
')) 
 167             self._sort_formats(formats) 
 169             entry['id'] = '%s_%d' % (entry['id'], num) 
 170             entry['title
'] = '%s part 
%d' % (entry['title
'], num) 
 171             entry['formats
'] = formats 
 172             entries.append(entry) 
 173         return self.playlist_result(entries, info['id'], info['title
']) 
 175     def _extract_info(self, info): 
 176         status = info.get('status
') 
 177         if status == 'recording
': 
 179         elif status == 'recorded
': 
 185             'title
': info.get('title
') or 'Untitled Broadcast
', 
 186             'description
': info.get('description
'), 
 187             'duration
': int_or_none(info.get('length
')), 
 188             'thumbnail
': info.get('preview
'), 
 189             'uploader
': info.get('channel
', {}).get('display_name
'), 
 190             'uploader_id
': info.get('channel
', {}).get('name
'), 
 191             'timestamp
': parse_iso8601(info.get('recorded_at
')), 
 192             'view_count
': int_or_none(info.get('views
')), 
 196     def _real_extract(self, url): 
 197         return self._extract_media(self._match_id(url)) 
 200 class TwitchVideoIE(TwitchItemBaseIE): 
 201     IE_NAME = 'twitch
:video
' 
 202     _VALID_URL = r'%s/[^
/]+/b
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 207         'url
': 'http
://www
.twitch
.tv
/riotgames
/b
/577357806', 
 210             'title
': 'Worlds Semifinals 
- Star Horn Royal Club vs
. OMG
', 
 212         'playlist_mincount
': 12, 
 213         'skip
': 'HTTP Error 
404: Not Found
', 
 217 class TwitchChapterIE(TwitchItemBaseIE): 
 218     IE_NAME = 'twitch
:chapter
' 
 219     _VALID_URL = r'%s/[^
/]+/c
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 220     _ITEM_TYPE = 'chapter
' 
 224         'url
': 'http
://www
.twitch
.tv
/acracingleague
/c
/5285812', 
 227             'title
': 'ACRL Off Season 
- Sports Cars 
@ Nordschleife
', 
 229         'playlist_mincount
': 3, 
 230         'skip
': 'HTTP Error 
404: Not Found
', 
 232         'url
': 'http
://www
.twitch
.tv
/tsm_theoddone
/c
/2349361', 
 233         'only_matching
': True, 
 237 class TwitchVodIE(TwitchItemBaseIE): 
 238     IE_NAME = 'twitch
:vod
' 
 239     _VALID_URL = r'''(?x) 
 242                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/| 
 243                             player\.twitch\.tv/\?.*?\bvideo=v 
 251         'url
': 'http
://www
.twitch
.tv
/riotgames
/v
/6528877?t
=5m10s
', 
 255             'title
': 'LCK Summer Split 
- Week 
6 Day 
1', 
 256             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 258             'timestamp
': 1435131709, 
 259             'upload_date
': '20150624', 
 260             'uploader
': 'Riot Games
', 
 261             'uploader_id
': 'riotgames
', 
 267             'skip_download
': True, 
 270         # Untitled broadcast (title is None) 
 271         'url
': 'http
://www
.twitch
.tv
/belkao_o
/v
/11230755', 
 275             'title
': 'Untitled Broadcast
', 
 276             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 278             'timestamp
': 1439746708, 
 279             'upload_date
': '20150816', 
 280             'uploader
': 'BelkAO_o
', 
 281             'uploader_id
': 'belkao_o
', 
 286             'skip_download
': True, 
 288         'skip
': 'HTTP Error 
404: Not Found
', 
 290         'url
': 'http
://player
.twitch
.tv
/?t
=5m10s
&video
=v6528877
', 
 291         'only_matching
': True, 
 293         'url
': 'https
://www
.twitch
.tv
/videos
/6528877', 
 294         'only_matching
': True, 
 296         'url
': 'https
://m
.twitch
.tv
/beagsandjam
/v
/247478721', 
 297         'only_matching
': True, 
 300     def _real_extract(self, url): 
 301         item_id = self._match_id(url) 
 303         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 304         access_token = self._call_api( 
 305             'api
/vods
/%s/access_token
' % item_id, item_id, 
 306             'Downloading 
%s access token
' % self._ITEM_TYPE) 
 308         formats = self._extract_m3u8_formats( 
 310                 self._USHER_BASE, item_id, 
 311                 compat_urllib_parse_urlencode({ 
 312                     'allow_source
': 'true
', 
 313                     'allow_audio_only
': 'true
', 
 314                     'allow_spectre
': 'true
', 
 315                     'player
': 'twitchweb
', 
 316                     'nauth
': access_token['token
'], 
 317                     'nauthsig
': access_token['sig
'], 
 319             item_id, 'mp4
', entry_protocol='m3u8_native
') 
 321         self._prefer_source(formats) 
 322         info['formats
'] = formats 
 324         parsed_url = compat_urllib_parse_urlparse(url) 
 325         query = compat_parse_qs(parsed_url.query) 
 327             info['start_time
'] = parse_duration(query['t
'][0]) 
 329         if info.get('timestamp
') is not None: 
 330             info['subtitles
'] = { 
 332                     'url
': update_url_query( 
 333                         'https
://rechat
.twitch
.tv
/rechat
-messages
', { 
 334                             'video_id
': 'v
%s' % item_id, 
 335                             'start
': info['timestamp
'], 
 344 class TwitchPlaylistBaseIE(TwitchBaseIE): 
 345     _PLAYLIST_PATH = 'kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d' 
 348     def _extract_playlist(self, channel_id): 
 349         info = self._call_api( 
 350             'kraken
/channels
/%s' % channel_id, 
 351             channel_id, 'Downloading channel info JSON
') 
 352         channel_name = info.get('display_name
') or info.get('name
') 
 355         limit = self._PAGE_LIMIT 
 356         broken_paging_detected = False 
 357         counter_override = None 
 358         for counter in itertools.count(1): 
 359             response = self._call_api( 
 360                 self._PLAYLIST_PATH % (channel_id, offset, limit), 
 362                 'Downloading 
%s JSON page 
%s' 
 363                 % (self._PLAYLIST_TYPE, counter_override or counter)) 
 364             page_entries = self._extract_playlist_page(response) 
 367             total = int_or_none(response.get('_total
')) 
 368             # Since the beginning of March 2016 twitch's paging mechanism
 
 369             # is completely broken on the twitch side. It simply ignores 
 370             # a limit and returns the whole offset number of videos. 
 371             # Working around by just requesting all videos at once. 
 372             # Upd: pagination bug was fixed by twitch on 15.03.2016. 
 373             if not broken_paging_detected 
and total 
and len(page_entries
) > limit
: 
 375                     'Twitch pagination is broken on twitch side, requesting all videos at once', 
 377                 broken_paging_detected 
= True 
 379                 counter_override 
= '(all at once)' 
 381             entries
.extend(page_entries
) 
 382             if broken_paging_detected 
or total 
and len(page_entries
) >= total
: 
 385         return self
.playlist_result( 
 386             [self
._make
_url
_result
(entry
) for entry 
in orderedSet(entries
)], 
 387             channel_id
, channel_name
) 
 389     def _make_url_result(self
, url
): 
 391             video_id 
= 'v%s' % TwitchVodIE
._match
_id
(url
) 
 392             return self
.url_result(url
, TwitchVodIE
.ie_key(), video_id
=video_id
) 
 393         except AssertionError: 
 394             return self
.url_result(url
) 
 396     def _extract_playlist_page(self
, response
): 
 397         videos 
= response
.get('videos') 
 398         return [video
['url'] for video 
in videos
] if videos 
else [] 
 400     def _real_extract(self
, url
): 
 401         return self
._extract
_playlist
(self
._match
_id
(url
)) 
 404 class TwitchProfileIE(TwitchPlaylistBaseIE
): 
 405     IE_NAME 
= 'twitch:profile' 
 406     _VALID_URL 
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
 
 407     _PLAYLIST_TYPE 
= 'profile' 
 410         'url': 'http://www.twitch.tv/vanillatv/profile', 
 413             'title': 'VanillaTV', 
 415         'playlist_mincount': 412, 
 417         'url': 'http://m.twitch.tv/vanillatv/profile', 
 418         'only_matching': True, 
 422 class TwitchVideosBaseIE(TwitchPlaylistBaseIE
): 
 423     _VALID_URL_VIDEOS_BASE 
= r
'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE
._VALID
_URL
_BASE
 
 424     _PLAYLIST_PATH 
= TwitchPlaylistBaseIE
._PLAYLIST
_PATH 
+ '&broadcast_type=' 
 427 class TwitchAllVideosIE(TwitchVideosBaseIE
): 
 428     IE_NAME 
= 'twitch:videos:all' 
 429     _VALID_URL 
= r
'%s/all' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 430     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive,upload,highlight' 
 431     _PLAYLIST_TYPE 
= 'all videos' 
 434         'url': 'https://www.twitch.tv/spamfish/videos/all', 
 439         'playlist_mincount': 869, 
 441         'url': 'https://m.twitch.tv/spamfish/videos/all', 
 442         'only_matching': True, 
 446 class TwitchUploadsIE(TwitchVideosBaseIE
): 
 447     IE_NAME 
= 'twitch:videos:uploads' 
 448     _VALID_URL 
= r
'%s/uploads' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 449     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'upload' 
 450     _PLAYLIST_TYPE 
= 'uploads' 
 453         'url': 'https://www.twitch.tv/spamfish/videos/uploads', 
 458         'playlist_mincount': 0, 
 460         'url': 'https://m.twitch.tv/spamfish/videos/uploads', 
 461         'only_matching': True, 
 465 class TwitchPastBroadcastsIE(TwitchVideosBaseIE
): 
 466     IE_NAME 
= 'twitch:videos:past-broadcasts' 
 467     _VALID_URL 
= r
'%s/past-broadcasts' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 468     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive' 
 469     _PLAYLIST_TYPE 
= 'past broadcasts' 
 472         'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 
 477         'playlist_mincount': 0, 
 479         'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts', 
 480         'only_matching': True, 
 484 class TwitchHighlightsIE(TwitchVideosBaseIE
): 
 485     IE_NAME 
= 'twitch:videos:highlights' 
 486     _VALID_URL 
= r
'%s/highlights' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 487     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'highlight' 
 488     _PLAYLIST_TYPE 
= 'highlights' 
 491         'url': 'https://www.twitch.tv/spamfish/videos/highlights', 
 496         'playlist_mincount': 805, 
 498         'url': 'https://m.twitch.tv/spamfish/videos/highlights', 
 499         'only_matching': True, 
 503 class TwitchStreamIE(TwitchBaseIE
): 
 504     IE_NAME 
= 'twitch:stream' 
 505     _VALID_URL 
= r
'''(?x) 
 508                             (?:(?:www|go|m)\.)?twitch\.tv/| 
 509                             player\.twitch\.tv/\?.*?\bchannel= 
 515         'url': 'http://www.twitch.tv/shroomztv', 
 518             'display_id': 'shroomztv', 
 520             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
 521             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV', 
 523             'timestamp': 1421928037, 
 524             'upload_date': '20150122', 
 525             'uploader': 'ShroomzTV', 
 526             'uploader_id': 'shroomztv', 
 531             'skip_download': True, 
 534         'url': 'http://www.twitch.tv/miracle_doto#profile-0', 
 535         'only_matching': True, 
 537         'url': 'https://player.twitch.tv/?channel=lotsofs', 
 538         'only_matching': True, 
 540         'url': 'https://go.twitch.tv/food', 
 541         'only_matching': True, 
 543         'url': 'https://m.twitch.tv/food', 
 544         'only_matching': True, 
 548     def suitable(cls
, url
): 
 550                 if any(ie
.suitable(url
) for ie 
in ( 
 557                     TwitchPastBroadcastsIE
, 
 559                 else super(TwitchStreamIE
, cls
).suitable(url
)) 
 561     def _real_extract(self
, url
): 
 562         channel_id 
= self
._match
_id
(url
) 
 564         stream 
= self
._call
_api
( 
 565             'kraken/streams/%s?stream_type=all' % channel_id
, channel_id
, 
 566             'Downloading stream JSON').get('stream') 
 569             raise ExtractorError('%s is offline' % channel_id
, expected
=True) 
 571         # Channel name may be typed if different case than the original channel name 
 572         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing 
 573         # an invalid m3u8 URL. Working around by use of original channel name from stream 
 574         # JSON and fallback to lowercase if it's not available. 
 575         channel_id 
= stream
.get('channel', {}).get('name') or channel_id
.lower() 
 577         access_token 
= self
._call
_api
( 
 578             'api/channels/%s/access_token' % channel_id
, channel_id
, 
 579             'Downloading channel access token') 
 582             'allow_source': 'true', 
 583             'allow_audio_only': 'true', 
 584             'allow_spectre': 'true', 
 585             'p': random
.randint(1000000, 10000000), 
 586             'player': 'twitchweb', 
 587             'segment_preference': '4', 
 588             'sig': access_token
['sig'].encode('utf-8'), 
 589             'token': access_token
['token'].encode('utf-8'), 
 591         formats 
= self
._extract
_m
3u8_formats
( 
 592             '%s/api/channel/hls/%s.m3u8?%s' 
 593             % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse_urlencode(query
)), 
 595         self
._prefer
_source
(formats
) 
 597         view_count 
= stream
.get('viewers') 
 598         timestamp 
= parse_iso8601(stream
.get('created_at')) 
 600         channel 
= stream
['channel'] 
 601         title 
= self
._live
_title
(channel
.get('display_name') or channel
.get('name')) 
 602         description 
= channel
.get('status') 
 605         for thumbnail_key
, thumbnail_url 
in stream
['preview'].items(): 
 606             m 
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
) 
 610                 'url': thumbnail_url
, 
 611                 'width': int(m
.group('width')), 
 612                 'height': int(m
.group('height')), 
 616             'id': compat_str(stream
['_id']), 
 617             'display_id': channel_id
, 
 619             'description': description
, 
 620             'thumbnails': thumbnails
, 
 621             'uploader': channel
.get('display_name'), 
 622             'uploader_id': channel
.get('name'), 
 623             'timestamp': timestamp
, 
 624             'view_count': view_count
, 
 630 class TwitchClipsIE(TwitchBaseIE
): 
 631     IE_NAME 
= 'twitch:clips' 
 632     _VALID_URL 
= r
'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
 635         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', 
 636         'md5': '761769e1eafce0ffebfb4089cb3847cd', 
 640             'title': 'EA Play 2016 Live from the Novo Theatre', 
 641             'thumbnail': r
're:^https?://.*\.jpg', 
 642             'timestamp': 1465767393, 
 643             'upload_date': '20160612', 
 645             'uploader': 'stereotype_', 
 646             'uploader_id': '43566419', 
 650         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 
 651         'only_matching': True, 
 654     def _real_extract(self
, url
): 
 655         video_id 
= self
._match
_id
(url
) 
 657         status 
= self
._download
_json
( 
 658             'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id
, 
 663         for option 
in status
['quality_options']: 
 664             if not isinstance(option
, dict): 
 666             source 
= option
.get('source') 
 667             if not source 
or not isinstance(source
, compat_str
): 
 671                 'format_id': option
.get('quality'), 
 672                 'height': int_or_none(option
.get('quality')), 
 673                 'fps': int_or_none(option
.get('frame_rate')), 
 676         self
._sort
_formats
(formats
) 
 682         clip 
= self
._call
_api
( 
 683             'kraken/clips/%s' % video_id
, video_id
, fatal
=False, headers
={ 
 684                 'Accept': 'application/vnd.twitchtv.v5+json', 
 688             quality_key 
= qualities(('tiny', 'small', 'medium')) 
 690             thumbnails_dict 
= clip
.get('thumbnails') 
 691             if isinstance(thumbnails_dict
, dict): 
 692                 for thumbnail_id
, thumbnail_url 
in thumbnails_dict
.items(): 
 695                         'url': thumbnail_url
, 
 696                         'preference': quality_key(thumbnail_id
), 
 700                 'id': clip
.get('tracking_id') or video_id
, 
 701                 'title': clip
.get('title') or video_id
, 
 702                 'duration': float_or_none(clip
.get('duration')), 
 703                 'views': int_or_none(clip
.get('views')), 
 704                 'timestamp': unified_timestamp(clip
.get('created_at')), 
 705                 'thumbnails': thumbnails
, 
 706                 'creator': try_get(clip
, lambda x
: x
['broadcaster']['display_name'], compat_str
), 
 707                 'uploader': try_get(clip
, lambda x
: x
['curator']['display_name'], compat_str
), 
 708                 'uploader_id': try_get(clip
, lambda x
: x
['curator']['id'], compat_str
),