2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  13     compat_urllib_parse_urlencode
, 
  14     compat_urllib_parse_urlparse
, 
  30 class TwitchBaseIE(InfoExtractor
): 
  31     _VALID_URL_BASE 
= r
'https?://(?:(?:www|go)\.)?twitch\.tv' 
  33     _API_BASE 
= 'https://api.twitch.tv' 
  34     _USHER_BASE 
= 'https://usher.ttvnw.net' 
  35     _LOGIN_URL 
= 'https://www.twitch.tv/login' 
  36     _CLIENT_ID 
= 'jzkbprff40iqj646a697cyrvl0zt2m6' 
  37     _NETRC_MACHINE 
= 'twitch' 
  39     def _handle_error(self
, response
): 
  40         if not isinstance(response
, dict): 
  42         error 
= response
.get('error') 
  45                 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')), 
  48     def _call_api(self
, path
, item_id
, note
): 
  49         response 
= self
._download
_json
( 
  50             '%s/%s' % (self
._API
_BASE
, path
), item_id
, note
, 
  51             headers
={'Client-ID': self
._CLIENT
_ID
}) 
  52         self
._handle
_error
(response
) 
  55     def _real_initialize(self
): 
  59         (username
, password
) = self
._get
_login
_info
() 
  65                 'Unable to login. Twitch said: %s' % message
, expected
=True) 
  67         def login_step(page
, urlh
, note
, data
): 
  68             form 
= self
._hidden
_inputs
(page
) 
  71             page_url 
= urlh
.geturl() 
  72             post_url 
= self
._search
_regex
( 
  73                 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', page, 
  74                 'post url
', default=page_url, group='url
') 
  75             post_url = urljoin(page_url, post_url) 
  77             headers = {'Referer
': page_url} 
  80                 response = self._download_json( 
  82                     data=urlencode_postdata(form), 
  84             except ExtractorError as e: 
  85                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: 
  86                     response = self._parse_json( 
  87                         e.cause.read().decode('utf
-8'), None) 
  88                     fail(response['message
']) 
  91             redirect_url = urljoin(post_url, response['redirect
']) 
  92             return self._download_webpage_handle( 
  93                 redirect_url, None, 'Downloading login redirect page
', 
  96         login_page, handle = self._download_webpage_handle( 
  97             self._LOGIN_URL, None, 'Downloading login page
') 
  99         # Some TOR nodes and public proxies are blocked completely 
 100         if 'blacklist_message
' in login_page: 
 101             fail(clean_html(login_page)) 
 103         redirect_page, handle = login_step( 
 104             login_page, handle, 'Logging 
in as %s' % username, { 
 105                 'username
': username, 
 106                 'password
': password, 
 109         if re.search(r'(?i
)<form
[^
>]+id="two-factor-submit"', redirect_page) is not None: 
 110             # TODO: Add mechanism to request an SMS or phone call 
 111             tfa_token = self._get_tfa_info('two
-factor authentication token
') 
 112             login_step(redirect_page, handle, 'Submitting TFA token
', { 
 113                 'authy_token
': tfa_token, 
 114                 'remember_2fa
': 'true
', 
 117     def _prefer_source(self, formats): 
 119             source = next(f for f in formats if f['format_id
'] == 'Source
') 
 120             source['preference
'] = 10 
 121         except StopIteration: 
 122             pass  # No Source stream present 
 123         self._sort_formats(formats) 
 126 class TwitchItemBaseIE(TwitchBaseIE): 
 127     def _download_info(self, item, item_id): 
 128         return self._extract_info(self._call_api( 
 129             'kraken
/videos
/%s%s' % (item, item_id), item_id, 
 130             'Downloading 
%s info JSON
' % self._ITEM_TYPE)) 
 132     def _extract_media(self, item_id): 
 133         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 134         response = self._call_api( 
 135             'api
/videos
/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, 
 136             'Downloading 
%s playlist JSON
' % self._ITEM_TYPE) 
 138         chunks = response['chunks
'] 
 139         qualities = list(chunks.keys()) 
 140         for num, fragment in enumerate(zip(*chunks.values()), start=1): 
 142             for fmt_num, fragment_fmt in enumerate(fragment): 
 143                 format_id = qualities[fmt_num] 
 145                     'url
': fragment_fmt['url
'], 
 146                     'format_id
': format_id, 
 147                     'quality
': 1 if format_id == 'live
' else 0, 
 149                 m = re.search(r'^
(?P
<height
>\d
+)[Pp
]', format_id) 
 151                     fmt['height
'] = int(m.group('height
')) 
 153             self._sort_formats(formats) 
 155             entry['id'] = '%s_%d' % (entry['id'], num) 
 156             entry['title
'] = '%s part 
%d' % (entry['title
'], num) 
 157             entry['formats
'] = formats 
 158             entries.append(entry) 
 159         return self.playlist_result(entries, info['id'], info['title
']) 
 161     def _extract_info(self, info): 
 164             'title
': info.get('title
') or 'Untitled Broadcast
', 
 165             'description
': info.get('description
'), 
 166             'duration
': int_or_none(info.get('length
')), 
 167             'thumbnail
': info.get('preview
'), 
 168             'uploader
': info.get('channel
', {}).get('display_name
'), 
 169             'uploader_id
': info.get('channel
', {}).get('name
'), 
 170             'timestamp
': parse_iso8601(info.get('recorded_at
')), 
 171             'view_count
': int_or_none(info.get('views
')), 
 174     def _real_extract(self, url): 
 175         return self._extract_media(self._match_id(url)) 
 178 class TwitchVideoIE(TwitchItemBaseIE): 
 179     IE_NAME = 'twitch
:video
' 
 180     _VALID_URL = r'%s/[^
/]+/b
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 185         'url
': 'http
://www
.twitch
.tv
/riotgames
/b
/577357806', 
 188             'title
': 'Worlds Semifinals 
- Star Horn Royal Club vs
. OMG
', 
 190         'playlist_mincount
': 12, 
 191         'skip
': 'HTTP Error 
404: Not Found
', 
 195 class TwitchChapterIE(TwitchItemBaseIE): 
 196     IE_NAME = 'twitch
:chapter
' 
 197     _VALID_URL = r'%s/[^
/]+/c
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE 
 198     _ITEM_TYPE = 'chapter
' 
 202         'url
': 'http
://www
.twitch
.tv
/acracingleague
/c
/5285812', 
 205             'title
': 'ACRL Off Season 
- Sports Cars 
@ Nordschleife
', 
 207         'playlist_mincount
': 3, 
 208         'skip
': 'HTTP Error 
404: Not Found
', 
 210         'url
': 'http
://www
.twitch
.tv
/tsm_theoddone
/c
/2349361', 
 211         'only_matching
': True, 
 215 class TwitchVodIE(TwitchItemBaseIE): 
 216     IE_NAME = 'twitch
:vod
' 
 217     _VALID_URL = r'''(?x) 
 220                             (?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/| 
 221                             player\.twitch\.tv/\?.*?\bvideo=v 
 229         'url
': 'http
://www
.twitch
.tv
/riotgames
/v
/6528877?t
=5m10s
', 
 233             'title
': 'LCK Summer Split 
- Week 
6 Day 
1', 
 234             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 236             'timestamp
': 1435131709, 
 237             'upload_date
': '20150624', 
 238             'uploader
': 'Riot Games
', 
 239             'uploader_id
': 'riotgames
', 
 245             'skip_download
': True, 
 248         # Untitled broadcast (title is None) 
 249         'url
': 'http
://www
.twitch
.tv
/belkao_o
/v
/11230755', 
 253             'title
': 'Untitled Broadcast
', 
 254             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 256             'timestamp
': 1439746708, 
 257             'upload_date
': '20150816', 
 258             'uploader
': 'BelkAO_o
', 
 259             'uploader_id
': 'belkao_o
', 
 264             'skip_download
': True, 
 266         'skip
': 'HTTP Error 
404: Not Found
', 
 268         'url
': 'http
://player
.twitch
.tv
/?t
=5m10s
&video
=v6528877
', 
 269         'only_matching
': True, 
 271         'url
': 'https
://www
.twitch
.tv
/videos
/6528877', 
 272         'only_matching
': True, 
 275     def _real_extract(self, url): 
 276         item_id = self._match_id(url) 
 278         info = self._download_info(self._ITEM_SHORTCUT, item_id) 
 279         access_token = self._call_api( 
 280             'api
/vods
/%s/access_token
' % item_id, item_id, 
 281             'Downloading 
%s access token
' % self._ITEM_TYPE) 
 283         formats = self._extract_m3u8_formats( 
 285                 self._USHER_BASE, item_id, 
 286                 compat_urllib_parse_urlencode({ 
 287                     'allow_source
': 'true
', 
 288                     'allow_audio_only
': 'true
', 
 289                     'allow_spectre
': 'true
', 
 290                     'player
': 'twitchweb
', 
 291                     'nauth
': access_token['token
'], 
 292                     'nauthsig
': access_token['sig
'], 
 294             item_id, 'mp4
', entry_protocol='m3u8_native
') 
 296         self._prefer_source(formats) 
 297         info['formats
'] = formats 
 299         parsed_url = compat_urllib_parse_urlparse(url) 
 300         query = compat_parse_qs(parsed_url.query) 
 302             info['start_time
'] = parse_duration(query['t
'][0]) 
 304         if info.get('timestamp
') is not None: 
 305             info['subtitles
'] = { 
 307                     'url
': update_url_query( 
 308                         'https
://rechat
.twitch
.tv
/rechat
-messages
', { 
 309                             'video_id
': 'v
%s' % item_id, 
 310                             'start
': info['timestamp
'], 
 319 class TwitchPlaylistBaseIE(TwitchBaseIE): 
 320     _PLAYLIST_PATH = 'kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d' 
 323     def _extract_playlist(self, channel_id): 
 324         info = self._call_api( 
 325             'kraken
/channels
/%s' % channel_id, 
 326             channel_id, 'Downloading channel info JSON
') 
 327         channel_name = info.get('display_name
') or info.get('name
') 
 330         limit = self._PAGE_LIMIT 
 331         broken_paging_detected = False 
 332         counter_override = None 
 333         for counter in itertools.count(1): 
 334             response = self._call_api( 
 335                 self._PLAYLIST_PATH % (channel_id, offset, limit), 
 337                 'Downloading 
%s JSON page 
%s' 
 338                 % (self._PLAYLIST_TYPE, counter_override or counter)) 
 339             page_entries = self._extract_playlist_page(response) 
 342             total = int_or_none(response.get('_total
')) 
 343             # Since the beginning of March 2016 twitch's paging mechanism
 
 344             # is completely broken on the twitch side. It simply ignores 
 345             # a limit and returns the whole offset number of videos. 
 346             # Working around by just requesting all videos at once. 
 347             # Upd: pagination bug was fixed by twitch on 15.03.2016. 
 348             if not broken_paging_detected 
and total 
and len(page_entries
) > limit
: 
 350                     'Twitch pagination is broken on twitch side, requesting all videos at once', 
 352                 broken_paging_detected 
= True 
 354                 counter_override 
= '(all at once)' 
 356             entries
.extend(page_entries
) 
 357             if broken_paging_detected 
or total 
and len(page_entries
) >= total
: 
 360         return self
.playlist_result( 
 361             [self
.url_result(entry
) for entry 
in orderedSet(entries
)], 
 362             channel_id
, channel_name
) 
 364     def _extract_playlist_page(self
, response
): 
 365         videos 
= response
.get('videos') 
 366         return [video
['url'] for video 
in videos
] if videos 
else [] 
 368     def _real_extract(self
, url
): 
 369         return self
._extract
_playlist
(self
._match
_id
(url
)) 
 372 class TwitchProfileIE(TwitchPlaylistBaseIE
): 
 373     IE_NAME 
= 'twitch:profile' 
 374     _VALID_URL 
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
 
 375     _PLAYLIST_TYPE 
= 'profile' 
 378         'url': 'http://www.twitch.tv/vanillatv/profile', 
 381             'title': 'VanillaTV', 
 383         'playlist_mincount': 412, 
 387 class TwitchVideosBaseIE(TwitchPlaylistBaseIE
): 
 388     _VALID_URL_VIDEOS_BASE 
= r
'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE
._VALID
_URL
_BASE
 
 389     _PLAYLIST_PATH 
= TwitchPlaylistBaseIE
._PLAYLIST
_PATH 
+ '&broadcast_type=' 
 392 class TwitchAllVideosIE(TwitchVideosBaseIE
): 
 393     IE_NAME 
= 'twitch:videos:all' 
 394     _VALID_URL 
= r
'%s/all' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 395     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive,upload,highlight' 
 396     _PLAYLIST_TYPE 
= 'all videos' 
 399         'url': 'https://www.twitch.tv/spamfish/videos/all', 
 404         'playlist_mincount': 869, 
 408 class TwitchUploadsIE(TwitchVideosBaseIE
): 
 409     IE_NAME 
= 'twitch:videos:uploads' 
 410     _VALID_URL 
= r
'%s/uploads' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 411     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'upload' 
 412     _PLAYLIST_TYPE 
= 'uploads' 
 415         'url': 'https://www.twitch.tv/spamfish/videos/uploads', 
 420         'playlist_mincount': 0, 
 424 class TwitchPastBroadcastsIE(TwitchVideosBaseIE
): 
 425     IE_NAME 
= 'twitch:videos:past-broadcasts' 
 426     _VALID_URL 
= r
'%s/past-broadcasts' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 427     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'archive' 
 428     _PLAYLIST_TYPE 
= 'past broadcasts' 
 431         'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 
 436         'playlist_mincount': 0, 
 440 class TwitchHighlightsIE(TwitchVideosBaseIE
): 
 441     IE_NAME 
= 'twitch:videos:highlights' 
 442     _VALID_URL 
= r
'%s/highlights' % TwitchVideosBaseIE
._VALID
_URL
_VIDEOS
_BASE
 
 443     _PLAYLIST_PATH 
= TwitchVideosBaseIE
._PLAYLIST
_PATH 
+ 'highlight' 
 444     _PLAYLIST_TYPE 
= 'highlights' 
 447         'url': 'https://www.twitch.tv/spamfish/videos/highlights', 
 452         'playlist_mincount': 805, 
 456 class TwitchStreamIE(TwitchBaseIE
): 
 457     IE_NAME 
= 'twitch:stream' 
 458     _VALID_URL 
= r
'''(?x) 
 461                             (?:(?:www|go)\.)?twitch\.tv/| 
 462                             player\.twitch\.tv/\?.*?\bchannel= 
 468         'url': 'http://www.twitch.tv/shroomztv', 
 471             'display_id': 'shroomztv', 
 473             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
 474             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV', 
 476             'timestamp': 1421928037, 
 477             'upload_date': '20150122', 
 478             'uploader': 'ShroomzTV', 
 479             'uploader_id': 'shroomztv', 
 484             'skip_download': True, 
 487         'url': 'http://www.twitch.tv/miracle_doto#profile-0', 
 488         'only_matching': True, 
 490         'url': 'https://player.twitch.tv/?channel=lotsofs', 
 491         'only_matching': True, 
 493         'url': 'https://go.twitch.tv/food', 
 494         'only_matching': True, 
 498     def suitable(cls
, url
): 
 500                 if any(ie
.suitable(url
) for ie 
in ( 
 507                     TwitchPastBroadcastsIE
, 
 509                 else super(TwitchStreamIE
, cls
).suitable(url
)) 
 511     def _real_extract(self
, url
): 
 512         channel_id 
= self
._match
_id
(url
) 
 514         stream 
= self
._call
_api
( 
 515             'kraken/streams/%s?stream_type=all' % channel_id
, channel_id
, 
 516             'Downloading stream JSON').get('stream') 
 519             raise ExtractorError('%s is offline' % channel_id
, expected
=True) 
 521         # Channel name may be typed if different case than the original channel name 
 522         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing 
 523         # an invalid m3u8 URL. Working around by use of original channel name from stream 
 524         # JSON and fallback to lowercase if it's not available. 
 525         channel_id 
= stream
.get('channel', {}).get('name') or channel_id
.lower() 
 527         access_token 
= self
._call
_api
( 
 528             'api/channels/%s/access_token' % channel_id
, channel_id
, 
 529             'Downloading channel access token') 
 532             'allow_source': 'true', 
 533             'allow_audio_only': 'true', 
 534             'allow_spectre': 'true', 
 535             'p': random
.randint(1000000, 10000000), 
 536             'player': 'twitchweb', 
 537             'segment_preference': '4', 
 538             'sig': access_token
['sig'].encode('utf-8'), 
 539             'token': access_token
['token'].encode('utf-8'), 
 541         formats 
= self
._extract
_m
3u8_formats
( 
 542             '%s/api/channel/hls/%s.m3u8?%s' 
 543             % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse_urlencode(query
)), 
 545         self
._prefer
_source
(formats
) 
 547         view_count 
= stream
.get('viewers') 
 548         timestamp 
= parse_iso8601(stream
.get('created_at')) 
 550         channel 
= stream
['channel'] 
 551         title 
= self
._live
_title
(channel
.get('display_name') or channel
.get('name')) 
 552         description 
= channel
.get('status') 
 555         for thumbnail_key
, thumbnail_url 
in stream
['preview'].items(): 
 556             m 
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
) 
 560                 'url': thumbnail_url
, 
 561                 'width': int(m
.group('width')), 
 562                 'height': int(m
.group('height')), 
 566             'id': compat_str(stream
['_id']), 
 567             'display_id': channel_id
, 
 569             'description': description
, 
 570             'thumbnails': thumbnails
, 
 571             'uploader': channel
.get('display_name'), 
 572             'uploader_id': channel
.get('name'), 
 573             'timestamp': timestamp
, 
 574             'view_count': view_count
, 
 580 class TwitchClipsIE(InfoExtractor
): 
 581     IE_NAME 
= 'twitch:clips' 
 582     _VALID_URL 
= r
'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
 585         'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', 
 586         'md5': '761769e1eafce0ffebfb4089cb3847cd', 
 588             'id': 'AggressiveCobraPoooound', 
 590             'title': 'EA Play 2016 Live from the Novo Theatre', 
 591             'thumbnail': r
're:^https?://.*\.jpg', 
 593             'uploader': 'stereotype_', 
 594             'uploader_id': 'stereotype_', 
 598         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 
 599         'only_matching': True, 
 602     def _real_extract(self
, url
): 
 603         video_id 
= self
._match
_id
(url
) 
 605         webpage 
= self
._download
_webpage
(url
, video_id
) 
 607         clip 
= self
._parse
_json
( 
 609                 r
'(?s)clipInfo\s*=\s*({.+?});', webpage
, 'clip info'), 
 610             video_id
, transform_source
=js_to_json
) 
 612         title 
= clip
.get('channel_title') or self
._og
_search
_title
(webpage
) 
 615             'url': option
['source'], 
 616             'format_id': option
.get('quality'), 
 617             'height': int_or_none(option
.get('quality')), 
 618         } for option 
in clip
.get('quality_options', []) if option
.get('source')] 
 622                 'url': clip
['clip_video_url'], 
 625         self
._sort
_formats
(formats
) 
 630             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
 631             'creator': clip
.get('broadcaster_display_name') or clip
.get('broadcaster_login'), 
 632             'uploader': clip
.get('curator_login'), 
 633             'uploader_id': clip
.get('curator_display_name'),