2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  14     compat_urllib_parse_urlencode
, 
  15     compat_urllib_parse_urlparse
, 
  34 class TwitchBaseIE(InfoExtractor
): 
  35     _VALID_URL_BASE 
= r
'https?://(?:(?:www|go|m)\.)?twitch\.tv' 
  37     _API_BASE 
= 'https://api.twitch.tv' 
  38     _USHER_BASE 
= 'https://usher.ttvnw.net' 
  39     _LOGIN_FORM_URL 
= 'https://www.twitch.tv/login' 
  40     _LOGIN_POST_URL 
= 'https://passport.twitch.tv/login' 
  41     _CLIENT_ID 
= 'kimne78kx3ncx6brgo4mv6wki5h1ko' 
  42     _NETRC_MACHINE 
= 'twitch' 
  44     def _handle_error(self
, response
): 
  45         if not isinstance(response
, dict): 
  47         error 
= response
.get('error') 
  50                 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')), 
  53     def _call_api(self
, path
, item_id
, *args
, **kwargs
): 
  54         headers 
= kwargs
.get('headers', {}).copy() 
  55         headers
['Client-ID'] = self
._CLIENT
_ID
 
  56         kwargs
['headers'] = headers
 
  57         response 
= self
._download
_json
( 
  58             '%s/%s' % (self
._API
_BASE
, path
), item_id
, 
  59             *args
, **compat_kwargs(kwargs
)) 
  60         self
._handle
_error
(response
) 
  63     def _real_initialize(self
): 
  67         username
, password 
= self
._get
_login
_info
() 
  73                 'Unable to login. Twitch said: %s' % message
, expected
=True) 
  75         def login_step(page
, urlh
, note
, data
): 
  76             form 
= self
._hidden
_inputs
(page
) 
  79             page_url 
= urlh
.geturl() 
  80             post_url 
= self
._search
_regex
( 
  81                 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', page, 
  82                 'post url
', default=self._LOGIN_POST_URL, group='url
') 
  83             post_url = urljoin(page_url, post_url) 
  88                 'Content
-Type
': 'text
/plain
;charset
=UTF
-8', 
  91             response = self._download_json( 
  92                 post_url, None, note, data=json.dumps(form).encode(), 
  93                 headers=headers, expected_status=400) 
  94             error = response.get('error_description
') or response.get('error_code
') 
  98             if 'Authenticated successfully
' in response.get('message
', ''): 
 101             redirect_url = urljoin( 
 103                 response.get('redirect
') or response['redirect_path
']) 
 104             return self._download_webpage_handle( 
 105                 redirect_url, None, 'Downloading login redirect page
', 
 108         login_page, handle = self._download_webpage_handle( 
 109             self._LOGIN_FORM_URL, None, 'Downloading login page
') 
 111         # Some TOR nodes and public proxies are blocked completely 
 112         if 'blacklist_message
' in login_page: 
 113             fail(clean_html(login_page)) 
 115         redirect_page, handle = login_step( 
 116             login_page, handle, 'Logging 
in', { 
 117                 'username
': username, 
 118                 'password
': password, 
 119                 'client_id
': self._CLIENT_ID, 
 123         if not redirect_page: 
 126         if re.search(r'(?i
)<form
[^
>]+id="two-factor-submit"', redirect_page) is not None: 
 127             # TODO: Add mechanism to request an SMS or phone call 
 128             tfa_token = self._get_tfa_info('two
-factor authentication token
') 
 129             login_step(redirect_page, handle, 'Submitting TFA token
', { 
 130                 'authy_token
': tfa_token, 
 131                 'remember_2fa
': 'true
', 
 134     def _prefer_source(self, formats): 
 136             source = next(f for f in formats if f['format_id
'] == 'Source
') 
 137             source['quality
'] = 10 
 138         except StopIteration: 
 140                 if '/chunked
/' in f['url
']: 
 143                         'format_note
': 'Source
', 
 145         self._sort_formats(formats) 
 148 class TwitchItemBaseIE(TwitchBaseIE): 
 149     def _download_info(self, item, item_id): 
 150         return self._extract_info(self._call_api( 
 151             'kraken
/videos
/%s%s' % (item, item_id), item_id, 
 152             'Downloading 
%s info JSON
' % self._ITEM_TYPE)) 
 154     def _extract_media(self, item_id): 
 155         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 156         response = self._call_api( 
 157             'api
/videos
/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, 
 158             'Downloading 
%s playlist JSON
' % self._ITEM_TYPE) 
 160         chunks = response['chunks
'] 
 161         qualities = list(chunks.keys()) 
 162         for num, fragment in enumerate(zip(*chunks.values()), start=1): 
 164             for fmt_num, fragment_fmt in enumerate(fragment): 
 165                 format_id = qualities[fmt_num] 
 167                     'url
': fragment_fmt['url
'], 
 168                     'format_id
': format_id, 
 169                     'quality
': 1 if format_id == 'live
' else 0, 
 171                 m = re.search(r'^
(?P
<height
>\d
+)[Pp
]', format_id) 
 173                     fmt['height
'] = int(m.group('height
')) 
 175             self._sort_formats(formats) 
 177             entry['id'] = '%s_%d' % (entry['id'], num) 
 178             entry['title
'] = '%s part 
%d' % (entry['title
'], num) 
 179             entry['formats
'] = formats 
 180             entries.append(entry) 
 181         return self.playlist_result(entries, info['id'], info['title
']) 
 183     def _extract_info(self, info): 
 184         status = info.get('status
') 
 185         if status == 'recording
': 
 187         elif status == 'recorded
': 
 193             'title
': info.get('title
') or 'Untitled Broadcast
', 
 194             'description
': info.get('description
'), 
 195             'duration
': int_or_none(info.get('length
')), 
 196             'thumbnail
': info.get('preview
'), 
 197             'uploader
': info.get('channel
', {}).get('display_name
'), 
 198             'uploader_id
': info.get('channel
', {}).get('name
'), 
 199             'timestamp
': parse_iso8601(info.get('recorded_at
')), 
 200             'view_count
': int_or_none(info.get('views
')), 
 204     def _real_extract(self, url): 
 205         return self._extract_media(self._match_id(url)) 
 208 class TwitchVideoIE(TwitchItemBaseIE): 
 209     IE_NAME = 'twitch
:video
' 
 210     _VALID_URL = r'%s/[^
/]+/b
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 215         'url
': 'http
://www
.twitch
.tv
/riotgames
/b
/577357806', 
 218             'title
': 'Worlds Semifinals 
- Star Horn Royal Club vs
. OMG
', 
 220         'playlist_mincount
': 12, 
 221         'skip
': 'HTTP Error 
404: Not Found
', 
 225 class TwitchChapterIE(TwitchItemBaseIE): 
 226     IE_NAME = 'twitch
:chapter
' 
 227     _VALID_URL = r'%s/[^
/]+/c
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 228     _ITEM_TYPE = 'chapter
' 
 232         'url
': 'http
://www
.twitch
.tv
/acracingleague
/c
/5285812', 
 235             'title
': 'ACRL Off Season 
- Sports Cars 
@ Nordschleife
', 
 237         'playlist_mincount
': 3, 
 238         'skip
': 'HTTP Error 
404: Not Found
', 
 240         'url
': 'http
://www
.twitch
.tv
/tsm_theoddone
/c
/2349361', 
 241         'only_matching
': True, 
 245 class TwitchVodIE(TwitchItemBaseIE): 
 246     IE_NAME = 'twitch
:vod
' 
 247     _VALID_URL = r'''(?x) 
 250                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/| 
 251                             player\.twitch\.tv/\?.*?\bvideo=v 
 259         'url
': 'http
://www
.twitch
.tv
/riotgames
/v
/6528877?t
=5m10s
', 
 263             'title
': 'LCK Summer Split 
- Week 
6 Day 
1', 
 264             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 266             'timestamp
': 1435131709, 
 267             'upload_date
': '20150624', 
 268             'uploader
': 'Riot Games
', 
 269             'uploader_id
': 'riotgames
', 
 275             'skip_download
': True, 
 278         # Untitled broadcast (title is None) 
 279         'url
': 'http
://www
.twitch
.tv
/belkao_o
/v
/11230755', 
 283             'title
': 'Untitled Broadcast
', 
 284             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 286             'timestamp
': 1439746708, 
 287             'upload_date
': '20150816', 
 288             'uploader
': 'BelkAO_o
', 
 289             'uploader_id
': 'belkao_o
', 
 294             'skip_download
': True, 
 296         'skip
': 'HTTP Error 
404: Not Found
', 
 298         'url
': 'http
://player
.twitch
.tv
/?t
=5m10s
&video
=v6528877
', 
 299         'only_matching
': True, 
 301         'url
': 'https
://www
.twitch
.tv
/videos
/6528877', 
 302         'only_matching
': True, 
 304         'url
': 'https
://m
.twitch
.tv
/beagsandjam
/v
/247478721', 
 305         'only_matching
': True, 
 307         'url
': 'https
://www
.twitch
.tv
/northernlion
/video
/291940395', 
 308         'only_matching
': True, 
 311     def _real_extract(self, url): 
 312         item_id = self._match_id(url) 
 314         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 315         access_token = self._call_api( 
 316             'api
/vods
/%s/access_token
' % item_id, item_id, 
 317             'Downloading 
%s access token
' % self._ITEM_TYPE) 
 319         formats = self._extract_m3u8_formats( 
 321                 self._USHER_BASE, item_id, 
 322                 compat_urllib_parse_urlencode({ 
 323                     'allow_source
': 'true
', 
 324                     'allow_audio_only
': 'true
', 
 325                     'allow_spectre
': 'true
', 
 326                     'player
': 'twitchweb
', 
 327                     'nauth
': access_token['token
'], 
 328                     'nauthsig
': access_token['sig
'], 
 330             item_id, 'mp4
', entry_protocol='m3u8_native
') 
 332         self._prefer_source(formats) 
 333         info['formats
'] = formats 
 335         parsed_url = compat_urllib_parse_urlparse(url) 
 336         query = compat_parse_qs(parsed_url.query) 
 338             info['start_time
'] = parse_duration(query['t
'][0]) 
 340         if info.get('timestamp
') is not None: 
 341             info['subtitles
'] = { 
 343                     'url
': update_url_query( 
 344                         'https
://rechat
.twitch
.tv
/rechat
-messages
', { 
 345                             'video_id
': 'v
%s' % item_id, 
 346                             'start
': info['timestamp
'], 
 355 class TwitchPlaylistBaseIE(TwitchBaseIE): 
 356     _PLAYLIST_PATH = 'kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d' 
 359     def _extract_playlist(self, channel_id): 
 360         info = self._call_api( 
 361             'kraken
/channels
/%s' % channel_id, 
 362             channel_id, 'Downloading channel info JSON
') 
 363         channel_name = info.get('display_name
') or info.get('name
') 
 366         limit = self._PAGE_LIMIT 
 367         broken_paging_detected = False 
 368         counter_override = None 
 369         for counter in itertools.count(1): 
 370             response = self._call_api( 
 371                 self._PLAYLIST_PATH % (channel_id, offset, limit), 
 373                 'Downloading 
%s JSON page 
%s' 
 374                 % (self._PLAYLIST_TYPE, counter_override or counter)) 
 375             page_entries = self._extract_playlist_page(response) 
 378             total = int_or_none(response.get('_total
')) 
 379             # Since the beginning of March 2016 twitch's paging mechanism
 
 380             # is completely broken on the twitch side. It simply ignores 
 381             # a limit and returns the whole offset number of videos. 
 382             # Working around by just requesting all videos at once. 
 383             # Upd: pagination bug was fixed by twitch on 15.03.2016. 
 384             if not broken_paging_detected 
and total 
and len(page_entries
) > limit
: 
 386                     'Twitch pagination is broken on twitch side, requesting all videos at once', 
 388                 broken_paging_detected 
= True 
 390                 counter_override 
= '(all at once)' 
 392             entries
.extend(page_entries
) 
 393             if broken_paging_detected 
or total 
and len(page_entries
) >= total
: 
 396         return self
.playlist_result( 
 397             [self
._make
_url
_result
(entry
) for entry 
in orderedSet(entries
)], 
 398             channel_id
, channel_name
) 
 400     def _make_url_result(self
, url
): 
 402             video_id 
= 'v%s' % TwitchVodIE
._match
_id
(url
) 
 403             return self
.url_result(url
, TwitchVodIE
.ie_key(), video_id
=video_id
) 
 404         except AssertionError: 
 405             return self
.url_result(url
) 
 407     def _extract_playlist_page(self
, response
): 
 408         videos 
= response
.get('videos') 
 409         return [video
['url'] for video 
in videos
] if videos 
else [] 
 411     def _real_extract(self
, url
): 
 412         return self
._extract
_playlist
(self
._match
_id
(url
)) 
 415 class TwitchProfileIE(TwitchPlaylistBaseIE
): 
 416     IE_NAME 
= 'twitch:profile' 
 417     _VALID_URL 
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
 
 418     _PLAYLIST_TYPE 
= 'profile' 
 421         'url': 'http://www.twitch.tv/vanillatv/profile', 
 424             'title': 'VanillaTV', 
 426         'playlist_mincount': 412, 
 428         'url': 'http://m.twitch.tv/vanillatv/profile', 
 429         'only_matching': True, 
 433 class TwitchVideosBaseIE(TwitchPlaylistBaseIE
): 
 434     _VALID_URL_VIDEOS_BASE 
= r
'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE
._VALID
_URL
_BASE
 
 435     _PLAYLIST_PATH 
= TwitchPlaylistBaseIE
._PLAYLIST
_PATH 
+ '&broadcast_type=' 
 438 class TwitchAllVideosIE(TwitchVideosBaseIE
): 
 439     IE_NAME 
= 'twitch:videos:all' 
 440     _VALID_URL 
= r
'%s/all' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 441     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive,upload,highlight' 
 442     _PLAYLIST_TYPE 
= 'all videos' 
 445         'url': 'https://www.twitch.tv/spamfish/videos/all', 
 450         'playlist_mincount': 869, 
 452         'url': 'https://m.twitch.tv/spamfish/videos/all', 
 453         'only_matching': True, 
 457 class TwitchUploadsIE(TwitchVideosBaseIE
): 
 458     IE_NAME 
= 'twitch:videos:uploads' 
 459     _VALID_URL 
= r
'%s/uploads' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 460     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'upload' 
 461     _PLAYLIST_TYPE 
= 'uploads' 
 464         'url': 'https://www.twitch.tv/spamfish/videos/uploads', 
 469         'playlist_mincount': 0, 
 471         'url': 'https://m.twitch.tv/spamfish/videos/uploads', 
 472         'only_matching': True, 
 476 class TwitchPastBroadcastsIE(TwitchVideosBaseIE
): 
 477     IE_NAME 
= 'twitch:videos:past-broadcasts' 
 478     _VALID_URL 
= r
'%s/past-broadcasts' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 479     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive' 
 480     _PLAYLIST_TYPE 
= 'past broadcasts' 
 483         'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 
 488         'playlist_mincount': 0, 
 490         'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts', 
 491         'only_matching': True, 
 495 class TwitchHighlightsIE(TwitchVideosBaseIE
): 
 496     IE_NAME 
= 'twitch:videos:highlights' 
 497     _VALID_URL 
= r
'%s/highlights' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 498     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'highlight' 
 499     _PLAYLIST_TYPE 
= 'highlights' 
 502         'url': 'https://www.twitch.tv/spamfish/videos/highlights', 
 507         'playlist_mincount': 805, 
 509         'url': 'https://m.twitch.tv/spamfish/videos/highlights', 
 510         'only_matching': True, 
 514 class TwitchStreamIE(TwitchBaseIE
): 
 515     IE_NAME 
= 'twitch:stream' 
 516     _VALID_URL 
= r
'''(?x) 
 519                             (?:(?:www|go|m)\.)?twitch\.tv/| 
 520                             player\.twitch\.tv/\?.*?\bchannel= 
 526         'url': 'http://www.twitch.tv/shroomztv', 
 529             'display_id': 'shroomztv', 
 531             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
 532             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV', 
 534             'timestamp': 1421928037, 
 535             'upload_date': '20150122', 
 536             'uploader': 'ShroomzTV', 
 537             'uploader_id': 'shroomztv', 
 542             'skip_download': True, 
 545         'url': 'http://www.twitch.tv/miracle_doto#profile-0', 
 546         'only_matching': True, 
 548         'url': 'https://player.twitch.tv/?channel=lotsofs', 
 549         'only_matching': True, 
 551         'url': 'https://go.twitch.tv/food', 
 552         'only_matching': True, 
 554         'url': 'https://m.twitch.tv/food', 
 555         'only_matching': True, 
 559     def suitable(cls
, url
): 
 561                 if any(ie
.suitable(url
) for ie 
in ( 
 568                     TwitchPastBroadcastsIE
, 
 571                 else super(TwitchStreamIE
, cls
).suitable(url
)) 
 573     def _real_extract(self
, url
): 
 574         channel_id 
= self
._match
_id
(url
) 
 576         stream 
= self
._call
_api
( 
 577             'kraken/streams/%s?stream_type=all' % channel_id
, channel_id
, 
 578             'Downloading stream JSON').get('stream') 
 581             raise ExtractorError('%s is offline' % channel_id
, expected
=True) 
 583         # Channel name may be typed if different case than the original channel name 
 584         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing 
 585         # an invalid m3u8 URL. Working around by use of original channel name from stream 
 586         # JSON and fallback to lowercase if it's not available. 
 587         channel_id 
= stream
.get('channel', {}).get('name') or channel_id
.lower() 
 589         access_token 
= self
._call
_api
( 
 590             'api/channels/%s/access_token' % channel_id
, channel_id
, 
 591             'Downloading channel access token') 
 594             'allow_source': 'true', 
 595             'allow_audio_only': 'true', 
 596             'allow_spectre': 'true', 
 597             'p': random
.randint(1000000, 10000000), 
 598             'player': 'twitchweb', 
 599             'segment_preference': '4', 
 600             'sig': access_token
['sig'].encode('utf-8'), 
 601             'token': access_token
['token'].encode('utf-8'), 
 603         formats 
= self
._extract
_m
3u8_formats
( 
 604             '%s/api/channel/hls/%s.m3u8?%s' 
 605             % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse_urlencode(query
)), 
 607         self
._prefer
_source
(formats
) 
 609         view_count 
= stream
.get('viewers') 
 610         timestamp 
= parse_iso8601(stream
.get('created_at')) 
 612         channel 
= stream
['channel'] 
 613         title 
= self
._live
_title
(channel
.get('display_name') or channel
.get('name')) 
 614         description 
= channel
.get('status') 
 617         for thumbnail_key
, thumbnail_url 
in stream
['preview'].items(): 
 618             m 
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
) 
 622                 'url': thumbnail_url
, 
 623                 'width': int(m
.group('width')), 
 624                 'height': int(m
.group('height')), 
 628             'id': compat_str(stream
['_id']), 
 629             'display_id': channel_id
, 
 631             'description': description
, 
 632             'thumbnails': thumbnails
, 
 633             'uploader': channel
.get('display_name'), 
 634             'uploader_id': channel
.get('name'), 
 635             'timestamp': timestamp
, 
 636             'view_count': view_count
, 
 642 class TwitchClipsIE(TwitchBaseIE
): 
 643     IE_NAME 
= 'twitch:clips' 
 644     _VALID_URL 
= r
'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' 
 647         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', 
 648         'md5': '761769e1eafce0ffebfb4089cb3847cd', 
 652             'title': 'EA Play 2016 Live from the Novo Theatre', 
 653             'thumbnail': r
're:^https?://.*\.jpg', 
 654             'timestamp': 1465767393, 
 655             'upload_date': '20160612', 
 657             'uploader': 'stereotype_', 
 658             'uploader_id': '43566419', 
 662         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 
 663         'only_matching': True, 
 665         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan', 
 666         'only_matching': True, 
 669     def _real_extract(self
, url
): 
 670         video_id 
= self
._match
_id
(url
) 
 672         status 
= self
._download
_json
( 
 673             'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id
, 
 678         for option 
in status
['quality_options']: 
 679             if not isinstance(option
, dict): 
 681             source 
= url_or_none(option
.get('source')) 
 686                 'format_id': option
.get('quality'), 
 687                 'height': int_or_none(option
.get('quality')), 
 688                 'fps': int_or_none(option
.get('frame_rate')), 
 691         self
._sort
_formats
(formats
) 
 697         clip 
= self
._call
_api
( 
 698             'kraken/clips/%s' % video_id
, video_id
, fatal
=False, headers
={ 
 699                 'Accept': 'application/vnd.twitchtv.v5+json', 
 703             quality_key 
= qualities(('tiny', 'small', 'medium')) 
 705             thumbnails_dict 
= clip
.get('thumbnails') 
 706             if isinstance(thumbnails_dict
, dict): 
 707                 for thumbnail_id
, thumbnail_url 
in thumbnails_dict
.items(): 
 710                         'url': thumbnail_url
, 
 711                         'preference': quality_key(thumbnail_id
), 
 715                 'id': clip
.get('tracking_id') or video_id
, 
 716                 'title': clip
.get('title') or video_id
, 
 717                 'duration': float_or_none(clip
.get('duration')), 
 718                 'views': int_or_none(clip
.get('views')), 
 719                 'timestamp': unified_timestamp(clip
.get('created_at')), 
 720                 'thumbnails': thumbnails
, 
 721                 'creator': try_get(clip
, lambda x
: x
['broadcaster']['display_name'], compat_str
), 
 722                 'uploader': try_get(clip
, lambda x
: x
['curator']['display_name'], compat_str
), 
 723                 'uploader_id': try_get(clip
, lambda x
: x
['curator']['id'], compat_str
),