2 from __future__
import unicode_literals
8 from .common
import InfoExtractor
13 compat_urllib_parse_urlencode
,
14 compat_urllib_parse_urlparse
,
29 class TwitchBaseIE(InfoExtractor
):
30 _VALID_URL_BASE
= r
'https?://(?:www\.)?twitch\.tv'
32 _API_BASE
= 'https://api.twitch.tv'
33 _USHER_BASE
= 'https://usher.ttvnw.net'
34 _LOGIN_URL
= 'http://www.twitch.tv/login'
35 _CLIENT_ID
= 'jzkbprff40iqj646a697cyrvl0zt2m6'
36 _NETRC_MACHINE
= 'twitch'
38 def _handle_error(self
, response
):
39 if not isinstance(response
, dict):
41 error
= response
.get('error')
44 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')),
47 def _call_api(self
, path
, item_id
, note
):
48 response
= self
._download
_json
(
49 '%s/%s' % (self
._API
_BASE
, path
), item_id
, note
,
50 headers
={'Client-ID': self
._CLIENT
_ID
})
51 self
._handle
_error
(response
)
54 def _real_initialize(self
):
58 (username
, password
) = self
._get
_login
_info
()
64 'Unable to login. Twitch said: %s' % message
, expected
=True)
66 login_page
, handle
= self
._download
_webpage
_handle
(
67 self
._LOGIN
_URL
, None, 'Downloading login page')
69 # Some TOR nodes and public proxies are blocked completely
70 if 'blacklist_message' in login_page
:
71 fail(clean_html(login_page
))
73 login_form
= self
._hidden
_inputs
(login_page
)
80 redirect_url
= handle
.geturl()
82 post_url
= self
._search
_regex
(
83 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', login_page,
84 'post url
', default=redirect_url, group='url
')
86 if not post_url.startswith('http
'):
87 post_url = compat_urlparse.urljoin(redirect_url, post_url)
89 headers = {'Referer
': redirect_url}
92 response = self._download_json(
93 post_url, None, 'Logging
in as %s' % username,
94 data=urlencode_postdata(login_form),
96 except ExtractorError as e:
97 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
98 response = self._parse_json(
99 e.cause.read().decode('utf
-8'), None)
100 fail(response['message
'])
103 if response.get('redirect
'):
104 self._download_webpage(
105 response['redirect
'], None, 'Downloading login redirect page
',
108 def _prefer_source(self, formats):
110 source = next(f for f in formats if f['format_id
'] == 'Source
')
111 source['preference
'] = 10
112 except StopIteration:
113 pass # No Source stream present
114 self._sort_formats(formats)
117 class TwitchItemBaseIE(TwitchBaseIE):
118 def _download_info(self, item, item_id):
119 return self._extract_info(self._call_api(
120 'kraken
/videos
/%s%s' % (item, item_id), item_id,
121 'Downloading
%s info JSON
' % self._ITEM_TYPE))
123 def _extract_media(self, item_id):
124 info = self._download_info(self._ITEM_SHORTCUT, item_id)
125 response = self._call_api(
126 'api
/videos
/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id,
127 'Downloading
%s playlist JSON
' % self._ITEM_TYPE)
129 chunks = response['chunks
']
130 qualities = list(chunks.keys())
131 for num, fragment in enumerate(zip(*chunks.values()), start=1):
133 for fmt_num, fragment_fmt in enumerate(fragment):
134 format_id = qualities[fmt_num]
136 'url
': fragment_fmt['url
'],
137 'format_id
': format_id,
138 'quality
': 1 if format_id == 'live
' else 0,
140 m = re.search(r'^
(?P
<height
>\d
+)[Pp
]', format_id)
142 fmt['height
'] = int(m.group('height
'))
144 self._sort_formats(formats)
146 entry['id'] = '%s_%d' % (entry['id'], num)
147 entry['title
'] = '%s part
%d' % (entry['title
'], num)
148 entry['formats
'] = formats
149 entries.append(entry)
150 return self.playlist_result(entries, info['id'], info['title
'])
152 def _extract_info(self, info):
155 'title
': info.get('title
') or 'Untitled Broadcast
',
156 'description
': info.get('description
'),
157 'duration
': int_or_none(info.get('length
')),
158 'thumbnail
': info.get('preview
'),
159 'uploader
': info.get('channel
', {}).get('display_name
'),
160 'uploader_id
': info.get('channel
', {}).get('name
'),
161 'timestamp
': parse_iso8601(info.get('recorded_at
')),
162 'view_count
': int_or_none(info.get('views
')),
165 def _real_extract(self, url):
166 return self._extract_media(self._match_id(url))
169 class TwitchVideoIE(TwitchItemBaseIE):
170 IE_NAME = 'twitch
:video
'
171 _VALID_URL = r'%s/[^
/]+/b
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE
176 'url
': 'http
://www
.twitch
.tv
/riotgames
/b
/577357806',
179 'title
': 'Worlds Semifinals
- Star Horn Royal Club vs
. OMG
',
181 'playlist_mincount
': 12,
182 'skip
': 'HTTP Error
404: Not Found
',
186 class TwitchChapterIE(TwitchItemBaseIE):
187 IE_NAME = 'twitch
:chapter
'
188 _VALID_URL = r'%s/[^
/]+/c
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE
189 _ITEM_TYPE = 'chapter
'
193 'url
': 'http
://www
.twitch
.tv
/acracingleague
/c
/5285812',
196 'title
': 'ACRL Off Season
- Sports Cars
@ Nordschleife
',
198 'playlist_mincount
': 3,
199 'skip
': 'HTTP Error
404: Not Found
',
201 'url
': 'http
://www
.twitch
.tv
/tsm_theoddone
/c
/2349361',
202 'only_matching
': True,
206 class TwitchVodIE(TwitchItemBaseIE):
207 IE_NAME = 'twitch
:vod
'
208 _VALID_URL = r'%s/[^
/]+/v
/(?P
<id>\d
+)' % TwitchBaseIE._VALID_URL_BASE
213 'url
': 'http
://www
.twitch
.tv
/riotgames
/v
/6528877?t
=5m10s
',
217 'title
': 'LCK Summer Split
- Week
6 Day
1',
218 'thumbnail
': 're
:^https?
://.*\
.jpg$
',
220 'timestamp
': 1435131709,
221 'upload_date
': '20150624',
222 'uploader
': 'Riot Games
',
223 'uploader_id
': 'riotgames
',
229 'skip_download
': True,
232 # Untitled broadcast (title is None)
233 'url
': 'http
://www
.twitch
.tv
/belkao_o
/v
/11230755',
237 'title
': 'Untitled Broadcast
',
238 'thumbnail
': 're
:^https?
://.*\
.jpg$
',
240 'timestamp
': 1439746708,
241 'upload_date
': '20150816',
242 'uploader
': 'BelkAO_o
',
243 'uploader_id
': 'belkao_o
',
248 'skip_download
': True,
250 'skip
': 'HTTP Error
404: Not Found
',
253 def _real_extract(self, url):
254 item_id = self._match_id(url)
256 info = self._download_info(self._ITEM_SHORTCUT, item_id)
257 access_token = self._call_api(
258 'api
/vods
/%s/access_token
' % item_id, item_id,
259 'Downloading
%s access token
' % self._ITEM_TYPE)
261 formats = self._extract_m3u8_formats(
263 self._USHER_BASE, item_id,
264 compat_urllib_parse_urlencode({
265 'allow_source
': 'true
',
266 'allow_audio_only
': 'true
',
267 'allow_spectre
': 'true
',
268 'player
': 'twitchweb
',
269 'nauth
': access_token['token
'],
270 'nauthsig
': access_token['sig
'],
272 item_id, 'mp4
', entry_protocol='m3u8_native
')
274 self._prefer_source(formats)
275 info['formats
'] = formats
277 parsed_url = compat_urllib_parse_urlparse(url)
278 query = compat_parse_qs(parsed_url.query)
280 info['start_time
'] = parse_duration(query['t
'][0])
285 class TwitchPlaylistBaseIE(TwitchBaseIE):
286 _PLAYLIST_PATH = 'kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d'
289 def _extract_playlist(self, channel_id):
290 info = self._call_api(
291 'kraken
/channels
/%s' % channel_id,
292 channel_id, 'Downloading channel info JSON
')
293 channel_name = info.get('display_name
') or info.get('name
')
296 limit = self._PAGE_LIMIT
297 broken_paging_detected = False
298 counter_override = None
299 for counter in itertools.count(1):
300 response = self._call_api(
301 self._PLAYLIST_PATH % (channel_id, offset, limit),
303 'Downloading
%s videos JSON page
%s'
304 % (self._PLAYLIST_TYPE, counter_override or counter))
305 page_entries = self._extract_playlist_page(response)
308 total = int_or_none(response.get('_total
'))
309 # Since the beginning of March 2016 twitch's paging mechanism
310 # is completely broken on the twitch side. It simply ignores
311 # a limit and returns the whole offset number of videos.
312 # Working around by just requesting all videos at once.
313 # Upd: pagination bug was fixed by twitch on 15.03.2016.
314 if not broken_paging_detected
and total
and len(page_entries
) > limit
:
316 'Twitch pagination is broken on twitch side, requesting all videos at once',
318 broken_paging_detected
= True
320 counter_override
= '(all at once)'
322 entries
.extend(page_entries
)
323 if broken_paging_detected
or total
and len(page_entries
) >= total
:
326 return self
.playlist_result(
327 [self
.url_result(entry
) for entry
in orderedSet(entries
)],
328 channel_id
, channel_name
)
330 def _extract_playlist_page(self
, response
):
331 videos
= response
.get('videos')
332 return [video
['url'] for video
in videos
] if videos
else []
334 def _real_extract(self
, url
):
335 return self
._extract
_playlist
(self
._match
_id
(url
))
338 class TwitchProfileIE(TwitchPlaylistBaseIE
):
339 IE_NAME
= 'twitch:profile'
340 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
341 _PLAYLIST_TYPE
= 'profile'
344 'url': 'http://www.twitch.tv/vanillatv/profile',
347 'title': 'VanillaTV',
349 'playlist_mincount': 412,
353 class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE
):
354 IE_NAME
= 'twitch:past_broadcasts'
355 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
356 _PLAYLIST_PATH
= TwitchPlaylistBaseIE
._PLAYLIST
_PATH
+ '&broadcasts=true'
357 _PLAYLIST_TYPE
= 'past broadcasts'
360 'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
365 'playlist_mincount': 54,
369 class TwitchStreamIE(TwitchBaseIE
):
370 IE_NAME
= 'twitch:stream'
371 _VALID_URL
= r
'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
374 'url': 'http://www.twitch.tv/shroomztv',
377 'display_id': 'shroomztv',
379 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
380 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
382 'timestamp': 1421928037,
383 'upload_date': '20150122',
384 'uploader': 'ShroomzTV',
385 'uploader_id': 'shroomztv',
390 'skip_download': True,
393 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
394 'only_matching': True,
397 def _real_extract(self
, url
):
398 channel_id
= self
._match
_id
(url
)
400 stream
= self
._call
_api
(
401 'kraken/streams/%s?stream_type=all' % channel_id
, channel_id
,
402 'Downloading stream JSON').get('stream')
405 raise ExtractorError('%s is offline' % channel_id
, expected
=True)
407 # Channel name may be typed if different case than the original channel name
408 # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
409 # an invalid m3u8 URL. Working around by use of original channel name from stream
410 # JSON and fallback to lowercase if it's not available.
411 channel_id
= stream
.get('channel', {}).get('name') or channel_id
.lower()
413 access_token
= self
._call
_api
(
414 'api/channels/%s/access_token' % channel_id
, channel_id
,
415 'Downloading channel access token')
418 'allow_source': 'true',
419 'allow_audio_only': 'true',
420 'allow_spectre': 'true',
421 'p': random
.randint(1000000, 10000000),
422 'player': 'twitchweb',
423 'segment_preference': '4',
424 'sig': access_token
['sig'].encode('utf-8'),
425 'token': access_token
['token'].encode('utf-8'),
427 formats
= self
._extract
_m
3u8_formats
(
428 '%s/api/channel/hls/%s.m3u8?%s'
429 % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse_urlencode(query
)),
431 self
._prefer
_source
(formats
)
433 view_count
= stream
.get('viewers')
434 timestamp
= parse_iso8601(stream
.get('created_at'))
436 channel
= stream
['channel']
437 title
= self
._live
_title
(channel
.get('display_name') or channel
.get('name'))
438 description
= channel
.get('status')
441 for thumbnail_key
, thumbnail_url
in stream
['preview'].items():
442 m
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
)
446 'url': thumbnail_url
,
447 'width': int(m
.group('width')),
448 'height': int(m
.group('height')),
452 'id': compat_str(stream
['_id']),
453 'display_id': channel_id
,
455 'description': description
,
456 'thumbnails': thumbnails
,
457 'uploader': channel
.get('display_name'),
458 'uploader_id': channel
.get('name'),
459 'timestamp': timestamp
,
460 'view_count': view_count
,
466 class TwitchClipsIE(InfoExtractor
):
467 IE_NAME
= 'twitch:clips'
468 _VALID_URL
= r
'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
471 'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
472 'md5': '761769e1eafce0ffebfb4089cb3847cd',
474 'id': 'AggressiveCobraPoooound',
476 'title': 'EA Play 2016 Live from the Novo Theatre',
477 'thumbnail': 're:^https?://.*\.jpg',
479 'uploader': 'stereotype_',
480 'uploader_id': 'stereotype_',
484 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
485 'only_matching': True,
488 def _real_extract(self
, url
):
489 video_id
= self
._match
_id
(url
)
491 webpage
= self
._download
_webpage
(url
, video_id
)
493 clip
= self
._parse
_json
(
495 r
'(?s)clipInfo\s*=\s*({.+?});', webpage
, 'clip info'),
496 video_id
, transform_source
=js_to_json
)
498 title
= clip
.get('channel_title') or self
._og
_search
_title
(webpage
)
501 'url': option
['source'],
502 'format_id': option
.get('quality'),
503 'height': int_or_none(option
.get('quality')),
504 } for option
in clip
.get('quality_options', []) if option
.get('source')]
508 'url': clip
['clip_video_url'],
511 self
._sort
_formats
(formats
)
516 'thumbnail': self
._og
_search
_thumbnail
(webpage
),
517 'creator': clip
.get('broadcaster_display_name') or clip
.get('broadcaster_login'),
518 'uploader': clip
.get('curator_login'),
519 'uploader_id': clip
.get('curator_display_name'),