2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  13     compat_urllib_parse_urlencode
, 
  14     compat_urllib_parse_urlparse
, 
  30 class TwitchBaseIE(InfoExtractor
): 
  31     _VALID_URL_BASE 
= r
'https?://(?:(?:www|go)\.)?twitch\.tv' 
  33     _API_BASE 
= 'https://api.twitch.tv' 
  34     _USHER_BASE 
= 'https://usher.ttvnw.net' 
  35     _LOGIN_URL 
= 'https://www.twitch.tv/login' 
  36     _CLIENT_ID 
= 'jzkbprff40iqj646a697cyrvl0zt2m6' 
  37     _NETRC_MACHINE 
= 'twitch' 
  39     def _handle_error(self
, response
): 
  40         if not isinstance(response
, dict): 
  42         error 
= response
.get('error') 
  45                 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')), 
  48     def _call_api(self
, path
, item_id
, note
): 
  49         response 
= self
._download
_json
( 
  50             '%s/%s' % (self
._API
_BASE
, path
), item_id
, note
, 
  51             headers
={'Client-ID': self
._CLIENT
_ID
}) 
  52         self
._handle
_error
(response
) 
  55     def _real_initialize(self
): 
  59         (username
, password
) = self
._get
_login
_info
() 
  65                 'Unable to login. Twitch said: %s' % message
, expected
=True) 
  67         def login_step(page
, urlh
, note
, data
): 
  68             form 
= self
._hidden
_inputs
(page
) 
  71             page_url 
= urlh
.geturl() 
  72             post_url 
= self
._search
_regex
( 
  73                 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', page, 
  74                 'post url
', default=page_url, group='url
') 
  75             post_url = urljoin(page_url, post_url) 
  77             headers = {'Referer
': page_url} 
  80                 response = self._download_json( 
  82                     data=urlencode_postdata(form), 
  84             except ExtractorError as e: 
  85                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: 
  86                     response = self._parse_json( 
  87                         e.cause.read().decode('utf
-8'), None) 
  88                     fail(response.get('message
') or response['errors
'][0]) 
  91             if 'Authenticated successfully
' in response.get('message
', ''): 
  94             redirect_url = urljoin( 
  96                 response.get('redirect
') or response['redirect_path
']) 
  97             return self._download_webpage_handle( 
  98                 redirect_url, None, 'Downloading login redirect page
', 
 101         login_page, handle = self._download_webpage_handle( 
 102             self._LOGIN_URL, None, 'Downloading login page
') 
 104         # Some TOR nodes and public proxies are blocked completely 
 105         if 'blacklist_message
' in login_page: 
 106             fail(clean_html(login_page)) 
 108         redirect_page, handle = login_step( 
 109             login_page, handle, 'Logging 
in', { 
 110                 'username
': username, 
 111                 'password
': password, 
 115         if not redirect_page: 
 118         if re.search(r'(?i
)<form
[^
>]+id="two-factor-submit"', redirect_page) is not None: 
 119             # TODO: Add mechanism to request an SMS or phone call 
 120             tfa_token = self._get_tfa_info('two
-factor authentication token
') 
 121             login_step(redirect_page, handle, 'Submitting TFA token
', { 
 122                 'authy_token
': tfa_token, 
 123                 'remember_2fa
': 'true
', 
 126     def _prefer_source(self, formats): 
 128             source = next(f for f in formats if f['format_id
'] == 'Source
') 
 129             source['preference
'] = 10 
 130         except StopIteration: 
 131             pass  # No Source stream present 
 132         self._sort_formats(formats) 
 135 class TwitchItemBaseIE(TwitchBaseIE): 
 136     def _download_info(self, item, item_id): 
 137         return self._extract_info(self._call_api( 
 138             'kraken
/videos
/%s%s' % (item, item_id), item_id, 
 139             'Downloading 
%s info JSON
' % self._ITEM_TYPE)) 
 141     def _extract_media(self, item_id): 
 142         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 143         response = self._call_api( 
 144             'api
/videos
/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, 
 145             'Downloading 
%s playlist JSON
' % self._ITEM_TYPE) 
 147         chunks = response['chunks
'] 
 148         qualities = list(chunks.keys()) 
 149         for num, fragment in enumerate(zip(*chunks.values()), start=1): 
 151             for fmt_num, fragment_fmt in enumerate(fragment): 
 152                 format_id = qualities[fmt_num] 
 154                     'url
': fragment_fmt['url
'], 
 155                     'format_id
': format_id, 
 156                     'quality
': 1 if format_id == 'live
' else 0, 
 158                 m = re.search(r'^
(?P
<height
>\d
+)[Pp
]', format_id) 
 160                     fmt['height
'] = int(m.group('height
')) 
 162             self._sort_formats(formats) 
 164             entry['id'] = '%s_%d' % (entry['id'], num) 
 165             entry['title
'] = '%s part 
%d' % (entry['title
'], num) 
 166             entry['formats
'] = formats 
 167             entries.append(entry) 
 168         return self.playlist_result(entries, info['id'], info['title
']) 
 170     def _extract_info(self, info): 
 173             'title
': info.get('title
') or 'Untitled Broadcast
', 
 174             'description
': info.get('description
'), 
 175             'duration
': int_or_none(info.get('length
')), 
 176             'thumbnail
': info.get('preview
'), 
 177             'uploader
': info.get('channel
', {}).get('display_name
'), 
 178             'uploader_id
': info.get('channel
', {}).get('name
'), 
 179             'timestamp
': parse_iso8601(info.get('recorded_at
')), 
 180             'view_count
': int_or_none(info.get('views
')), 
 183     def _real_extract(self, url): 
 184         return self._extract_media(self._match_id(url)) 
 187 class TwitchVideoIE(TwitchItemBaseIE): 
 188     IE_NAME = 'twitch
:video
' 
 189     _VALID_URL = r'%s/[^
/]+/b
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 194         'url
': 'http
://www
.twitch
.tv
/riotgames
/b
/577357806', 
 197             'title
': 'Worlds Semifinals 
- Star Horn Royal Club vs
. OMG
', 
 199         'playlist_mincount
': 12, 
 200         'skip
': 'HTTP Error 
404: Not Found
', 
 204 class TwitchChapterIE(TwitchItemBaseIE): 
 205     IE_NAME = 'twitch
:chapter
' 
 206     _VALID_URL = r'%s/[^
/]+/c
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 207     _ITEM_TYPE = 'chapter
' 
 211         'url
': 'http
://www
.twitch
.tv
/acracingleague
/c
/5285812', 
 214             'title
': 'ACRL Off Season 
- Sports Cars 
@ Nordschleife
', 
 216         'playlist_mincount
': 3, 
 217         'skip
': 'HTTP Error 
404: Not Found
', 
 219         'url
': 'http
://www
.twitch
.tv
/tsm_theoddone
/c
/2349361', 
 220         'only_matching
': True, 
 224 class TwitchVodIE(TwitchItemBaseIE): 
 225     IE_NAME = 'twitch
:vod
' 
 226     _VALID_URL = r'''(?x) 
 229                             (?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/| 
 230                             player\.twitch\.tv/\?.*?\bvideo=v 
 238         'url
': 'http
://www
.twitch
.tv
/riotgames
/v
/6528877?t
=5m10s
', 
 242             'title
': 'LCK Summer Split 
- Week 
6 Day 
1', 
 243             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 245             'timestamp
': 1435131709, 
 246             'upload_date
': '20150624', 
 247             'uploader
': 'Riot Games
', 
 248             'uploader_id
': 'riotgames
', 
 254             'skip_download
': True, 
 257         # Untitled broadcast (title is None) 
 258         'url
': 'http
://www
.twitch
.tv
/belkao_o
/v
/11230755', 
 262             'title
': 'Untitled Broadcast
', 
 263             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 265             'timestamp
': 1439746708, 
 266             'upload_date
': '20150816', 
 267             'uploader
': 'BelkAO_o
', 
 268             'uploader_id
': 'belkao_o
', 
 273             'skip_download
': True, 
 275         'skip
': 'HTTP Error 
404: Not Found
', 
 277         'url
': 'http
://player
.twitch
.tv
/?t
=5m10s
&video
=v6528877
', 
 278         'only_matching
': True, 
 280         'url
': 'https
://www
.twitch
.tv
/videos
/6528877', 
 281         'only_matching
': True, 
 284     def _real_extract(self, url): 
 285         item_id = self._match_id(url) 
 287         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 288         access_token = self._call_api( 
 289             'api
/vods
/%s/access_token
' % item_id, item_id, 
 290             'Downloading 
%s access token
' % self._ITEM_TYPE) 
 292         formats = self._extract_m3u8_formats( 
 294                 self._USHER_BASE, item_id, 
 295                 compat_urllib_parse_urlencode({ 
 296                     'allow_source
': 'true
', 
 297                     'allow_audio_only
': 'true
', 
 298                     'allow_spectre
': 'true
', 
 299                     'player
': 'twitchweb
', 
 300                     'nauth
': access_token['token
'], 
 301                     'nauthsig
': access_token['sig
'], 
 303             item_id, 'mp4
', entry_protocol='m3u8_native
') 
 305         self._prefer_source(formats) 
 306         info['formats
'] = formats 
 308         parsed_url = compat_urllib_parse_urlparse(url) 
 309         query = compat_parse_qs(parsed_url.query) 
 311             info['start_time
'] = parse_duration(query['t
'][0]) 
 313         if info.get('timestamp
') is not None: 
 314             info['subtitles
'] = { 
 316                     'url
': update_url_query( 
 317                         'https
://rechat
.twitch
.tv
/rechat
-messages
', { 
 318                             'video_id
': 'v
%s' % item_id, 
 319                             'start
': info['timestamp
'], 
 328 class TwitchPlaylistBaseIE(TwitchBaseIE): 
 329     _PLAYLIST_PATH = 'kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d' 
 332     def _extract_playlist(self, channel_id): 
 333         info = self._call_api( 
 334             'kraken
/channels
/%s' % channel_id, 
 335             channel_id, 'Downloading channel info JSON
') 
 336         channel_name = info.get('display_name
') or info.get('name
') 
 339         limit = self._PAGE_LIMIT 
 340         broken_paging_detected = False 
 341         counter_override = None 
 342         for counter in itertools.count(1): 
 343             response = self._call_api( 
 344                 self._PLAYLIST_PATH % (channel_id, offset, limit), 
 346                 'Downloading 
%s JSON page 
%s' 
 347                 % (self._PLAYLIST_TYPE, counter_override or counter)) 
 348             page_entries = self._extract_playlist_page(response) 
 351             total = int_or_none(response.get('_total
')) 
 352             # Since the beginning of March 2016 twitch's paging mechanism
 
 353             # is completely broken on the twitch side. It simply ignores 
 354             # a limit and returns the whole offset number of videos. 
 355             # Working around by just requesting all videos at once. 
 356             # Upd: pagination bug was fixed by twitch on 15.03.2016. 
 357             if not broken_paging_detected 
and total 
and len(page_entries
) > limit
: 
 359                     'Twitch pagination is broken on twitch side, requesting all videos at once', 
 361                 broken_paging_detected 
= True 
 363                 counter_override 
= '(all at once)' 
 365             entries
.extend(page_entries
) 
 366             if broken_paging_detected 
or total 
and len(page_entries
) >= total
: 
 369         return self
.playlist_result( 
 370             [self
._make
_url
_result
(entry
) for entry 
in orderedSet(entries
)], 
 371             channel_id
, channel_name
) 
 373     def _make_url_result(self
, url
): 
 375             video_id 
= 'v%s' % TwitchVodIE
._match
_id
(url
) 
 376             return self
.url_result(url
, TwitchVodIE
.ie_key(), video_id
=video_id
) 
 377         except AssertionError: 
 378             return self
.url_result(url
) 
 380     def _extract_playlist_page(self
, response
): 
 381         videos 
= response
.get('videos') 
 382         return [video
['url'] for video 
in videos
] if videos 
else [] 
 384     def _real_extract(self
, url
): 
 385         return self
._extract
_playlist
(self
._match
_id
(url
)) 
 388 class TwitchProfileIE(TwitchPlaylistBaseIE
): 
 389     IE_NAME 
= 'twitch:profile' 
 390     _VALID_URL 
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
 
 391     _PLAYLIST_TYPE 
= 'profile' 
 394         'url': 'http://www.twitch.tv/vanillatv/profile', 
 397             'title': 'VanillaTV', 
 399         'playlist_mincount': 412, 
 403 class TwitchVideosBaseIE(TwitchPlaylistBaseIE
): 
 404     _VALID_URL_VIDEOS_BASE 
= r
'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE
._VALID
_URL
_BASE
 
 405     _PLAYLIST_PATH 
= TwitchPlaylistBaseIE
._PLAYLIST
_PATH 
+ '&broadcast_type=' 
 408 class TwitchAllVideosIE(TwitchVideosBaseIE
): 
 409     IE_NAME 
= 'twitch:videos:all' 
 410     _VALID_URL 
= r
'%s/all' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 411     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive,upload,highlight' 
 412     _PLAYLIST_TYPE 
= 'all videos' 
 415         'url': 'https://www.twitch.tv/spamfish/videos/all', 
 420         'playlist_mincount': 869, 
 424 class TwitchUploadsIE(TwitchVideosBaseIE
): 
 425     IE_NAME 
= 'twitch:videos:uploads' 
 426     _VALID_URL 
= r
'%s/uploads' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 427     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'upload' 
 428     _PLAYLIST_TYPE 
= 'uploads' 
 431         'url': 'https://www.twitch.tv/spamfish/videos/uploads', 
 436         'playlist_mincount': 0, 
 440 class TwitchPastBroadcastsIE(TwitchVideosBaseIE
): 
 441     IE_NAME 
= 'twitch:videos:past-broadcasts' 
 442     _VALID_URL 
= r
'%s/past-broadcasts' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 443     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive' 
 444     _PLAYLIST_TYPE 
= 'past broadcasts' 
 447         'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 
 452         'playlist_mincount': 0, 
 456 class TwitchHighlightsIE(TwitchVideosBaseIE
): 
 457     IE_NAME 
= 'twitch:videos:highlights' 
 458     _VALID_URL 
= r
'%s/highlights' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 459     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'highlight' 
 460     _PLAYLIST_TYPE 
= 'highlights' 
 463         'url': 'https://www.twitch.tv/spamfish/videos/highlights', 
 468         'playlist_mincount': 805, 
 472 class TwitchStreamIE(TwitchBaseIE
): 
 473     IE_NAME 
= 'twitch:stream' 
 474     _VALID_URL 
= r
'''(?x) 
 477                             (?:(?:www|go)\.)?twitch\.tv/| 
 478                             player\.twitch\.tv/\?.*?\bchannel= 
 484         'url': 'http://www.twitch.tv/shroomztv', 
 487             'display_id': 'shroomztv', 
 489             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
 490             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV', 
 492             'timestamp': 1421928037, 
 493             'upload_date': '20150122', 
 494             'uploader': 'ShroomzTV', 
 495             'uploader_id': 'shroomztv', 
 500             'skip_download': True, 
 503         'url': 'http://www.twitch.tv/miracle_doto#profile-0', 
 504         'only_matching': True, 
 506         'url': 'https://player.twitch.tv/?channel=lotsofs', 
 507         'only_matching': True, 
 509         'url': 'https://go.twitch.tv/food', 
 510         'only_matching': True, 
 514     def suitable(cls
, url
): 
 516                 if any(ie
.suitable(url
) for ie 
in ( 
 523                     TwitchPastBroadcastsIE
, 
 525                 else super(TwitchStreamIE
, cls
).suitable(url
)) 
 527     def _real_extract(self
, url
): 
 528         channel_id 
= self
._match
_id
(url
) 
 530         stream 
= self
._call
_api
( 
 531             'kraken/streams/%s?stream_type=all' % channel_id
, channel_id
, 
 532             'Downloading stream JSON').get('stream') 
 535             raise ExtractorError('%s is offline' % channel_id
, expected
=True) 
 537         # Channel name may be typed if different case than the original channel name 
 538         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing 
 539         # an invalid m3u8 URL. Working around by use of original channel name from stream 
 540         # JSON and fallback to lowercase if it's not available. 
 541         channel_id 
= stream
.get('channel', {}).get('name') or channel_id
.lower() 
 543         access_token 
= self
._call
_api
( 
 544             'api/channels/%s/access_token' % channel_id
, channel_id
, 
 545             'Downloading channel access token') 
 548             'allow_source': 'true', 
 549             'allow_audio_only': 'true', 
 550             'allow_spectre': 'true', 
 551             'p': random
.randint(1000000, 10000000), 
 552             'player': 'twitchweb', 
 553             'segment_preference': '4', 
 554             'sig': access_token
['sig'].encode('utf-8'), 
 555             'token': access_token
['token'].encode('utf-8'), 
 557         formats 
= self
._extract
_m
3u8_formats
( 
 558             '%s/api/channel/hls/%s.m3u8?%s' 
 559             % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse_urlencode(query
)), 
 561         self
._prefer
_source
(formats
) 
 563         view_count 
= stream
.get('viewers') 
 564         timestamp 
= parse_iso8601(stream
.get('created_at')) 
 566         channel 
= stream
['channel'] 
 567         title 
= self
._live
_title
(channel
.get('display_name') or channel
.get('name')) 
 568         description 
= channel
.get('status') 
 571         for thumbnail_key
, thumbnail_url 
in stream
['preview'].items(): 
 572             m 
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
) 
 576                 'url': thumbnail_url
, 
 577                 'width': int(m
.group('width')), 
 578                 'height': int(m
.group('height')), 
 582             'id': compat_str(stream
['_id']), 
 583             'display_id': channel_id
, 
 585             'description': description
, 
 586             'thumbnails': thumbnails
, 
 587             'uploader': channel
.get('display_name'), 
 588             'uploader_id': channel
.get('name'), 
 589             'timestamp': timestamp
, 
 590             'view_count': view_count
, 
 596 class TwitchClipsIE(InfoExtractor
): 
 597     IE_NAME 
= 'twitch:clips' 
 598     _VALID_URL 
= r
'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
 601         'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', 
 602         'md5': '761769e1eafce0ffebfb4089cb3847cd', 
 604             'id': 'AggressiveCobraPoooound', 
 606             'title': 'EA Play 2016 Live from the Novo Theatre', 
 607             'thumbnail': r
're:^https?://.*\.jpg', 
 609             'uploader': 'stereotype_', 
 610             'uploader_id': 'stereotype_', 
 614         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 
 615         'only_matching': True, 
 618     def _real_extract(self
, url
): 
 619         video_id 
= self
._match
_id
(url
) 
 621         webpage 
= self
._download
_webpage
(url
, video_id
) 
 623         clip 
= self
._parse
_json
( 
 625                 r
'(?s)clipInfo\s*=\s*({.+?});', webpage
, 'clip info'), 
 626             video_id
, transform_source
=js_to_json
) 
 628         title 
= clip
.get('title') or clip
.get('channel_title') or self
._og
_search
_title
(webpage
) 
 631             'url': option
['source'], 
 632             'format_id': option
.get('quality'), 
 633             'height': int_or_none(option
.get('quality')), 
 634         } for option 
in clip
.get('quality_options', []) if option
.get('source')] 
 638                 'url': clip
['clip_video_url'], 
 641         self
._sort
_formats
(formats
) 
 646             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
 647             'creator': clip
.get('broadcaster_display_name') or clip
.get('broadcaster_login'), 
 648             'uploader': clip
.get('curator_login'), 
 649             'uploader_id': clip
.get('curator_display_name'),