2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  14     compat_urllib_parse_urlencode
, 
  15     compat_urllib_parse_urlparse
, 
  34 class TwitchBaseIE(InfoExtractor
): 
  35     _VALID_URL_BASE 
= r
'https?://(?:(?:www|go|m)\.)?twitch\.tv' 
  37     _API_BASE 
= 'https://api.twitch.tv' 
  38     _USHER_BASE 
= 'https://usher.ttvnw.net' 
  39     _LOGIN_FORM_URL 
= 'https://www.twitch.tv/login' 
  40     _LOGIN_POST_URL 
= 'https://passport.twitch.tv/login' 
  41     _CLIENT_ID 
= 'kimne78kx3ncx6brgo4mv6wki5h1ko' 
  42     _NETRC_MACHINE 
= 'twitch' 
  44     def _handle_error(self
, response
): 
  45         if not isinstance(response
, dict): 
  47         error 
= response
.get('error') 
  50                 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')), 
  53     def _call_api(self
, path
, item_id
, *args
, **kwargs
): 
  54         headers 
= kwargs
.get('headers', {}).copy() 
  55         headers
['Client-ID'] = self
._CLIENT
_ID
 
  56         kwargs
['headers'] = headers
 
  57         response 
= self
._download
_json
( 
  58             '%s/%s' % (self
._API
_BASE
, path
), item_id
, 
  59             *args
, **compat_kwargs(kwargs
)) 
  60         self
._handle
_error
(response
) 
  63     def _real_initialize(self
): 
  67         username
, password 
= self
._get
_login
_info
() 
  73                 'Unable to login. Twitch said: %s' % message
, expected
=True) 
  75         def login_step(page
, urlh
, note
, data
): 
  76             form 
= self
._hidden
_inputs
(page
) 
  79             page_url 
= urlh
.geturl() 
  80             post_url 
= self
._search
_regex
( 
  81                 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', page, 
  82                 'post url
', default=self._LOGIN_POST_URL, group='url
') 
  83             post_url = urljoin(page_url, post_url) 
  88                 'Content
-Type
': 'text
/plain
;charset
=UTF
-8', 
  91             response = self._download_json( 
  92                 post_url, None, note, data=json.dumps(form).encode(), 
  93                 headers=headers, expected_status=400) 
  94             error = response.get('error_description
') or response.get('error_code
') 
  98             if 'Authenticated successfully
' in response.get('message
', ''): 
 101             redirect_url = urljoin( 
 103                 response.get('redirect
') or response['redirect_path
']) 
 104             return self._download_webpage_handle( 
 105                 redirect_url, None, 'Downloading login redirect page
', 
 108         login_page, handle = self._download_webpage_handle( 
 109             self._LOGIN_FORM_URL, None, 'Downloading login page
') 
 111         # Some TOR nodes and public proxies are blocked completely 
 112         if 'blacklist_message
' in login_page: 
 113             fail(clean_html(login_page)) 
 115         redirect_page, handle = login_step( 
 116             login_page, handle, 'Logging 
in', { 
 117                 'username
': username, 
 118                 'password
': password, 
 119                 'client_id
': self._CLIENT_ID, 
 123         if not redirect_page: 
 126         if re.search(r'(?i
)<form
[^
>]+id="two-factor-submit"', redirect_page) is not None: 
 127             # TODO: Add mechanism to request an SMS or phone call 
 128             tfa_token = self._get_tfa_info('two
-factor authentication token
') 
 129             login_step(redirect_page, handle, 'Submitting TFA token
', { 
 130                 'authy_token
': tfa_token, 
 131                 'remember_2fa
': 'true
', 
 134     def _prefer_source(self, formats): 
 136             source = next(f for f in formats if f['format_id
'] == 'Source
') 
 137             source['preference
'] = 10 
 138         except StopIteration: 
 139             pass  # No Source stream present 
 140         self._sort_formats(formats) 
 143 class TwitchItemBaseIE(TwitchBaseIE): 
 144     def _download_info(self, item, item_id): 
 145         return self._extract_info(self._call_api( 
 146             'kraken
/videos
/%s%s' % (item, item_id), item_id, 
 147             'Downloading 
%s info JSON
' % self._ITEM_TYPE)) 
 149     def _extract_media(self, item_id): 
 150         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 151         response = self._call_api( 
 152             'api
/videos
/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, 
 153             'Downloading 
%s playlist JSON
' % self._ITEM_TYPE) 
 155         chunks = response['chunks
'] 
 156         qualities = list(chunks.keys()) 
 157         for num, fragment in enumerate(zip(*chunks.values()), start=1): 
 159             for fmt_num, fragment_fmt in enumerate(fragment): 
 160                 format_id = qualities[fmt_num] 
 162                     'url
': fragment_fmt['url
'], 
 163                     'format_id
': format_id, 
 164                     'quality
': 1 if format_id == 'live
' else 0, 
 166                 m = re.search(r'^
(?P
<height
>\d
+)[Pp
]', format_id) 
 168                     fmt['height
'] = int(m.group('height
')) 
 170             self._sort_formats(formats) 
 172             entry['id'] = '%s_%d' % (entry['id'], num) 
 173             entry['title
'] = '%s part 
%d' % (entry['title
'], num) 
 174             entry['formats
'] = formats 
 175             entries.append(entry) 
 176         return self.playlist_result(entries, info['id'], info['title
']) 
 178     def _extract_info(self, info): 
 179         status = info.get('status
') 
 180         if status == 'recording
': 
 182         elif status == 'recorded
': 
 188             'title
': info.get('title
') or 'Untitled Broadcast
', 
 189             'description
': info.get('description
'), 
 190             'duration
': int_or_none(info.get('length
')), 
 191             'thumbnail
': info.get('preview
'), 
 192             'uploader
': info.get('channel
', {}).get('display_name
'), 
 193             'uploader_id
': info.get('channel
', {}).get('name
'), 
 194             'timestamp
': parse_iso8601(info.get('recorded_at
')), 
 195             'view_count
': int_or_none(info.get('views
')), 
 199     def _real_extract(self, url): 
 200         return self._extract_media(self._match_id(url)) 
 203 class TwitchVideoIE(TwitchItemBaseIE): 
 204     IE_NAME = 'twitch
:video
' 
 205     _VALID_URL = r'%s/[^
/]+/b
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 210         'url
': 'http
://www
.twitch
.tv
/riotgames
/b
/577357806', 
 213             'title
': 'Worlds Semifinals 
- Star Horn Royal Club vs
. OMG
', 
 215         'playlist_mincount
': 12, 
 216         'skip
': 'HTTP Error 
404: Not Found
', 
 220 class TwitchChapterIE(TwitchItemBaseIE): 
 221     IE_NAME = 'twitch
:chapter
' 
 222     _VALID_URL = r'%s/[^
/]+/c
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 223     _ITEM_TYPE = 'chapter
' 
 227         'url
': 'http
://www
.twitch
.tv
/acracingleague
/c
/5285812', 
 230             'title
': 'ACRL Off Season 
- Sports Cars 
@ Nordschleife
', 
 232         'playlist_mincount
': 3, 
 233         'skip
': 'HTTP Error 
404: Not Found
', 
 235         'url
': 'http
://www
.twitch
.tv
/tsm_theoddone
/c
/2349361', 
 236         'only_matching
': True, 
 240 class TwitchVodIE(TwitchItemBaseIE): 
 241     IE_NAME = 'twitch
:vod
' 
 242     _VALID_URL = r'''(?x) 
 245                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/| 
 246                             player\.twitch\.tv/\?.*?\bvideo=v 
 254         'url
': 'http
://www
.twitch
.tv
/riotgames
/v
/6528877?t
=5m10s
', 
 258             'title
': 'LCK Summer Split 
- Week 
6 Day 
1', 
 259             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 261             'timestamp
': 1435131709, 
 262             'upload_date
': '20150624', 
 263             'uploader
': 'Riot Games
', 
 264             'uploader_id
': 'riotgames
', 
 270             'skip_download
': True, 
 273         # Untitled broadcast (title is None) 
 274         'url
': 'http
://www
.twitch
.tv
/belkao_o
/v
/11230755', 
 278             'title
': 'Untitled Broadcast
', 
 279             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 281             'timestamp
': 1439746708, 
 282             'upload_date
': '20150816', 
 283             'uploader
': 'BelkAO_o
', 
 284             'uploader_id
': 'belkao_o
', 
 289             'skip_download
': True, 
 291         'skip
': 'HTTP Error 
404: Not Found
', 
 293         'url
': 'http
://player
.twitch
.tv
/?t
=5m10s
&video
=v6528877
', 
 294         'only_matching
': True, 
 296         'url
': 'https
://www
.twitch
.tv
/videos
/6528877', 
 297         'only_matching
': True, 
 299         'url
': 'https
://m
.twitch
.tv
/beagsandjam
/v
/247478721', 
 300         'only_matching
': True, 
 302         'url
': 'https
://www
.twitch
.tv
/northernlion
/video
/291940395', 
 303         'only_matching
': True, 
 306     def _real_extract(self, url): 
 307         item_id = self._match_id(url) 
 309         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 310         access_token = self._call_api( 
 311             'api
/vods
/%s/access_token
' % item_id, item_id, 
 312             'Downloading 
%s access token
' % self._ITEM_TYPE) 
 314         formats = self._extract_m3u8_formats( 
 316                 self._USHER_BASE, item_id, 
 317                 compat_urllib_parse_urlencode({ 
 318                     'allow_source
': 'true
', 
 319                     'allow_audio_only
': 'true
', 
 320                     'allow_spectre
': 'true
', 
 321                     'player
': 'twitchweb
', 
 322                     'nauth
': access_token['token
'], 
 323                     'nauthsig
': access_token['sig
'], 
 325             item_id, 'mp4
', entry_protocol='m3u8_native
') 
 327         self._prefer_source(formats) 
 328         info['formats
'] = formats 
 330         parsed_url = compat_urllib_parse_urlparse(url) 
 331         query = compat_parse_qs(parsed_url.query) 
 333             info['start_time
'] = parse_duration(query['t
'][0]) 
 335         if info.get('timestamp
') is not None: 
 336             info['subtitles
'] = { 
 338                     'url
': update_url_query( 
 339                         'https
://rechat
.twitch
.tv
/rechat
-messages
', { 
 340                             'video_id
': 'v
%s' % item_id, 
 341                             'start
': info['timestamp
'], 
 350 class TwitchPlaylistBaseIE(TwitchBaseIE): 
 351     _PLAYLIST_PATH = 'kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d' 
 354     def _extract_playlist(self, channel_id): 
 355         info = self._call_api( 
 356             'kraken
/channels
/%s' % channel_id, 
 357             channel_id, 'Downloading channel info JSON
') 
 358         channel_name = info.get('display_name
') or info.get('name
') 
 361         limit = self._PAGE_LIMIT 
 362         broken_paging_detected = False 
 363         counter_override = None 
 364         for counter in itertools.count(1): 
 365             response = self._call_api( 
 366                 self._PLAYLIST_PATH % (channel_id, offset, limit), 
 368                 'Downloading 
%s JSON page 
%s' 
 369                 % (self._PLAYLIST_TYPE, counter_override or counter)) 
 370             page_entries = self._extract_playlist_page(response) 
 373             total = int_or_none(response.get('_total
')) 
 374             # Since the beginning of March 2016 twitch's paging mechanism
 
 375             # is completely broken on the twitch side. It simply ignores 
 376             # a limit and returns the whole offset number of videos. 
 377             # Working around by just requesting all videos at once. 
 378             # Upd: pagination bug was fixed by twitch on 15.03.2016. 
 379             if not broken_paging_detected 
and total 
and len(page_entries
) > limit
: 
 381                     'Twitch pagination is broken on twitch side, requesting all videos at once', 
 383                 broken_paging_detected 
= True 
 385                 counter_override 
= '(all at once)' 
 387             entries
.extend(page_entries
) 
 388             if broken_paging_detected 
or total 
and len(page_entries
) >= total
: 
 391         return self
.playlist_result( 
 392             [self
._make
_url
_result
(entry
) for entry 
in orderedSet(entries
)], 
 393             channel_id
, channel_name
) 
 395     def _make_url_result(self
, url
): 
 397             video_id 
= 'v%s' % TwitchVodIE
._match
_id
(url
) 
 398             return self
.url_result(url
, TwitchVodIE
.ie_key(), video_id
=video_id
) 
 399         except AssertionError: 
 400             return self
.url_result(url
) 
 402     def _extract_playlist_page(self
, response
): 
 403         videos 
= response
.get('videos') 
 404         return [video
['url'] for video 
in videos
] if videos 
else [] 
 406     def _real_extract(self
, url
): 
 407         return self
._extract
_playlist
(self
._match
_id
(url
)) 
 410 class TwitchProfileIE(TwitchPlaylistBaseIE
): 
 411     IE_NAME 
= 'twitch:profile' 
 412     _VALID_URL 
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
 
 413     _PLAYLIST_TYPE 
= 'profile' 
 416         'url': 'http://www.twitch.tv/vanillatv/profile', 
 419             'title': 'VanillaTV', 
 421         'playlist_mincount': 412, 
 423         'url': 'http://m.twitch.tv/vanillatv/profile', 
 424         'only_matching': True, 
 428 class TwitchVideosBaseIE(TwitchPlaylistBaseIE
): 
 429     _VALID_URL_VIDEOS_BASE 
= r
'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE
._VALID
_URL
_BASE
 
 430     _PLAYLIST_PATH 
= TwitchPlaylistBaseIE
._PLAYLIST
_PATH 
+ '&broadcast_type=' 
 433 class TwitchAllVideosIE(TwitchVideosBaseIE
): 
 434     IE_NAME 
= 'twitch:videos:all' 
 435     _VALID_URL 
= r
'%s/all' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 436     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive,upload,highlight' 
 437     _PLAYLIST_TYPE 
= 'all videos' 
 440         'url': 'https://www.twitch.tv/spamfish/videos/all', 
 445         'playlist_mincount': 869, 
 447         'url': 'https://m.twitch.tv/spamfish/videos/all', 
 448         'only_matching': True, 
 452 class TwitchUploadsIE(TwitchVideosBaseIE
): 
 453     IE_NAME 
= 'twitch:videos:uploads' 
 454     _VALID_URL 
= r
'%s/uploads' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 455     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'upload' 
 456     _PLAYLIST_TYPE 
= 'uploads' 
 459         'url': 'https://www.twitch.tv/spamfish/videos/uploads', 
 464         'playlist_mincount': 0, 
 466         'url': 'https://m.twitch.tv/spamfish/videos/uploads', 
 467         'only_matching': True, 
 471 class TwitchPastBroadcastsIE(TwitchVideosBaseIE
): 
 472     IE_NAME 
= 'twitch:videos:past-broadcasts' 
 473     _VALID_URL 
= r
'%s/past-broadcasts' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 474     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive' 
 475     _PLAYLIST_TYPE 
= 'past broadcasts' 
 478         'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 
 483         'playlist_mincount': 0, 
 485         'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts', 
 486         'only_matching': True, 
 490 class TwitchHighlightsIE(TwitchVideosBaseIE
): 
 491     IE_NAME 
= 'twitch:videos:highlights' 
 492     _VALID_URL 
= r
'%s/highlights' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 493     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'highlight' 
 494     _PLAYLIST_TYPE 
= 'highlights' 
 497         'url': 'https://www.twitch.tv/spamfish/videos/highlights', 
 502         'playlist_mincount': 805, 
 504         'url': 'https://m.twitch.tv/spamfish/videos/highlights', 
 505         'only_matching': True, 
 509 class TwitchStreamIE(TwitchBaseIE
): 
 510     IE_NAME 
= 'twitch:stream' 
 511     _VALID_URL 
= r
'''(?x) 
 514                             (?:(?:www|go|m)\.)?twitch\.tv/| 
 515                             player\.twitch\.tv/\?.*?\bchannel= 
 521         'url': 'http://www.twitch.tv/shroomztv', 
 524             'display_id': 'shroomztv', 
 526             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
 527             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV', 
 529             'timestamp': 1421928037, 
 530             'upload_date': '20150122', 
 531             'uploader': 'ShroomzTV', 
 532             'uploader_id': 'shroomztv', 
 537             'skip_download': True, 
 540         'url': 'http://www.twitch.tv/miracle_doto#profile-0', 
 541         'only_matching': True, 
 543         'url': 'https://player.twitch.tv/?channel=lotsofs', 
 544         'only_matching': True, 
 546         'url': 'https://go.twitch.tv/food', 
 547         'only_matching': True, 
 549         'url': 'https://m.twitch.tv/food', 
 550         'only_matching': True, 
 554     def suitable(cls
, url
): 
 556                 if any(ie
.suitable(url
) for ie 
in ( 
 563                     TwitchPastBroadcastsIE
, 
 566                 else super(TwitchStreamIE
, cls
).suitable(url
)) 
 568     def _real_extract(self
, url
): 
 569         channel_id 
= self
._match
_id
(url
) 
 571         stream 
= self
._call
_api
( 
 572             'kraken/streams/%s?stream_type=all' % channel_id
, channel_id
, 
 573             'Downloading stream JSON').get('stream') 
 576             raise ExtractorError('%s is offline' % channel_id
, expected
=True) 
 578         # Channel name may be typed if different case than the original channel name 
 579         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing 
 580         # an invalid m3u8 URL. Working around by use of original channel name from stream 
 581         # JSON and fallback to lowercase if it's not available. 
 582         channel_id 
= stream
.get('channel', {}).get('name') or channel_id
.lower() 
 584         access_token 
= self
._call
_api
( 
 585             'api/channels/%s/access_token' % channel_id
, channel_id
, 
 586             'Downloading channel access token') 
 589             'allow_source': 'true', 
 590             'allow_audio_only': 'true', 
 591             'allow_spectre': 'true', 
 592             'p': random
.randint(1000000, 10000000), 
 593             'player': 'twitchweb', 
 594             'segment_preference': '4', 
 595             'sig': access_token
['sig'].encode('utf-8'), 
 596             'token': access_token
['token'].encode('utf-8'), 
 598         formats 
= self
._extract
_m
3u8_formats
( 
 599             '%s/api/channel/hls/%s.m3u8?%s' 
 600             % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse_urlencode(query
)), 
 602         self
._prefer
_source
(formats
) 
 604         view_count 
= stream
.get('viewers') 
 605         timestamp 
= parse_iso8601(stream
.get('created_at')) 
 607         channel 
= stream
['channel'] 
 608         title 
= self
._live
_title
(channel
.get('display_name') or channel
.get('name')) 
 609         description 
= channel
.get('status') 
 612         for thumbnail_key
, thumbnail_url 
in stream
['preview'].items(): 
 613             m 
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
) 
 617                 'url': thumbnail_url
, 
 618                 'width': int(m
.group('width')), 
 619                 'height': int(m
.group('height')), 
 623             'id': compat_str(stream
['_id']), 
 624             'display_id': channel_id
, 
 626             'description': description
, 
 627             'thumbnails': thumbnails
, 
 628             'uploader': channel
.get('display_name'), 
 629             'uploader_id': channel
.get('name'), 
 630             'timestamp': timestamp
, 
 631             'view_count': view_count
, 
 637 class TwitchClipsIE(TwitchBaseIE
): 
 638     IE_NAME 
= 'twitch:clips' 
 639     _VALID_URL 
= r
'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' 
 642         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', 
 643         'md5': '761769e1eafce0ffebfb4089cb3847cd', 
 647             'title': 'EA Play 2016 Live from the Novo Theatre', 
 648             'thumbnail': r
're:^https?://.*\.jpg', 
 649             'timestamp': 1465767393, 
 650             'upload_date': '20160612', 
 652             'uploader': 'stereotype_', 
 653             'uploader_id': '43566419', 
 657         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 
 658         'only_matching': True, 
 660         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', 
 661         'only_matching': True, 
 664     def _real_extract(self
, url
): 
 665         video_id 
= self
._match
_id
(url
) 
 667         status 
= self
._download
_json
( 
 668             'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id
, 
 673         for option 
in status
['quality_options']: 
 674             if not isinstance(option
, dict): 
 676             source 
= url_or_none(option
.get('source')) 
 681                 'format_id': option
.get('quality'), 
 682                 'height': int_or_none(option
.get('quality')), 
 683                 'fps': int_or_none(option
.get('frame_rate')), 
 686         self
._sort
_formats
(formats
) 
 692         clip 
= self
._call
_api
( 
 693             'kraken/clips/%s' % video_id
, video_id
, fatal
=False, headers
={ 
 694                 'Accept': 'application/vnd.twitchtv.v5+json', 
 698             quality_key 
= qualities(('tiny', 'small', 'medium')) 
 700             thumbnails_dict 
= clip
.get('thumbnails') 
 701             if isinstance(thumbnails_dict
, dict): 
 702                 for thumbnail_id
, thumbnail_url 
in thumbnails_dict
.items(): 
 705                         'url': thumbnail_url
, 
 706                         'preference': quality_key(thumbnail_id
), 
 710                 'id': clip
.get('tracking_id') or video_id
, 
 711                 'title': clip
.get('title') or video_id
, 
 712                 'duration': float_or_none(clip
.get('duration')), 
 713                 'views': int_or_none(clip
.get('views')), 
 714                 'timestamp': unified_timestamp(clip
.get('created_at')), 
 715                 'thumbnails': thumbnails
, 
 716                 'creator': try_get(clip
, lambda x
: x
['broadcaster']['display_name'], compat_str
), 
 717                 'uploader': try_get(clip
, lambda x
: x
['curator']['display_name'], compat_str
), 
 718                 'uploader_id': try_get(clip
, lambda x
: x
['curator']['id'], compat_str
),