]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitch.py
2 from __future__
import unicode_literals
8 from .common
import InfoExtractor
12 compat_urllib_request
,
20 class TwitchBaseIE(InfoExtractor
):
21 _VALID_URL_BASE
= r
'https?://(?:www\.)?twitch\.tv'
23 _API_BASE
= 'https://api.twitch.tv'
24 _USHER_BASE
= 'http://usher.twitch.tv'
25 _LOGIN_URL
= 'https://secure.twitch.tv/login'
26 _LOGIN_POST_URL
= 'https://passport.twitch.tv/authorize'
27 _NETRC_MACHINE
= 'twitch'
29 def _handle_error(self
, response
):
30 if not isinstance(response
, dict):
32 error
= response
.get('error')
35 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')),
38 def _download_json(self
, url
, video_id
, note
='Downloading JSON metadata'):
40 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
41 'X-Requested-With': 'XMLHttpRequest',
43 for cookie
in self
._downloader
.cookiejar
:
44 if cookie
.name
== 'api_token':
45 headers
['Twitch-Api-Token'] = cookie
.value
46 request
= compat_urllib_request
.Request(url
, headers
=headers
)
47 response
= super(TwitchBaseIE
, self
)._download
_json
(request
, video_id
, note
)
48 self
._handle
_error
(response
)
51 def _real_initialize(self
):
55 (username
, password
) = self
._get
_login
_info
()
59 login_page
= self
._download
_webpage
(
60 self
._LOGIN
_URL
, None, 'Downloading login page')
62 login_form
= self
._hidden
_inputs
(login_page
)
65 'login': username
.encode('utf-8'),
66 'password': password
.encode('utf-8'),
69 request
= compat_urllib_request
.Request(
70 self
._LOGIN
_POST
_URL
, compat_urllib_parse
.urlencode(login_form
).encode('utf-8'))
71 request
.add_header('Referer', self
._LOGIN
_URL
)
72 response
= self
._download
_webpage
(
73 request
, None, 'Logging in as %s' % username
)
75 error_message
= self
._search
_regex
(
76 r
'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
77 response
, 'error message', default
=None)
80 'Unable to login. Twitch said: %s' % error_message
, expected
=True)
82 if '>Reset your password<' in response
:
83 self
.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
85 def _prefer_source(self
, formats
):
87 source
= next(f
for f
in formats
if f
['format_id'] == 'Source')
88 source
['preference'] = 10
90 pass # No Source stream present
91 self
._sort
_formats
(formats
)
94 class TwitchItemBaseIE(TwitchBaseIE
):
95 def _download_info(self
, item
, item_id
):
96 return self
._extract
_info
(self
._download
_json
(
97 '%s/kraken/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
,
98 'Downloading %s info JSON' % self
._ITEM
_TYPE
))
100 def _extract_media(self
, item_id
):
101 info
= self
._download
_info
(self
._ITEM
_SHORTCUT
, item_id
)
102 response
= self
._download
_json
(
103 '%s/api/videos/%s%s' % (self
._API
_BASE
, self
._ITEM
_SHORTCUT
, item_id
), item_id
,
104 'Downloading %s playlist JSON' % self
._ITEM
_TYPE
)
106 chunks
= response
['chunks']
107 qualities
= list(chunks
.keys())
108 for num
, fragment
in enumerate(zip(*chunks
.values()), start
=1):
110 for fmt_num
, fragment_fmt
in enumerate(fragment
):
111 format_id
= qualities
[fmt_num
]
113 'url': fragment_fmt
['url'],
114 'format_id': format_id
,
115 'quality': 1 if format_id
== 'live' else 0,
117 m
= re
.search(r
'^(?P<height>\d+)[Pp]', format_id
)
119 fmt
['height'] = int(m
.group('height'))
121 self
._sort
_formats
(formats
)
123 entry
['id'] = '%s_%d' % (entry
['id'], num
)
124 entry
['title'] = '%s part %d' % (entry
['title'], num
)
125 entry
['formats'] = formats
126 entries
.append(entry
)
127 return self
.playlist_result(entries
, info
['id'], info
['title'])
129 def _extract_info(self
, info
):
132 'title': info
['title'],
133 'description': info
['description'],
134 'duration': info
['length'],
135 'thumbnail': info
['preview'],
136 'uploader': info
['channel']['display_name'],
137 'uploader_id': info
['channel']['name'],
138 'timestamp': parse_iso8601(info
['recorded_at']),
139 'view_count': info
['views'],
142 def _real_extract(self
, url
):
143 return self
._extract
_media
(self
._match
_id
(url
))
146 class TwitchVideoIE(TwitchItemBaseIE
):
147 IE_NAME
= 'twitch:video'
148 _VALID_URL
= r
'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE
._VALID
_URL
_BASE
153 'url': 'http://www.twitch.tv/riotgames/b/577357806',
156 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
158 'playlist_mincount': 12,
162 class TwitchChapterIE(TwitchItemBaseIE
):
163 IE_NAME
= 'twitch:chapter'
164 _VALID_URL
= r
'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE
._VALID
_URL
_BASE
165 _ITEM_TYPE
= 'chapter'
169 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
172 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
174 'playlist_mincount': 3,
176 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
177 'only_matching': True,
181 class TwitchVodIE(TwitchItemBaseIE
):
182 IE_NAME
= 'twitch:vod'
183 _VALID_URL
= r
'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE
._VALID
_URL
_BASE
188 'url': 'http://www.twitch.tv/riotgames/v/6528877',
192 'title': 'LCK Summer Split - Week 6 Day 1',
193 'thumbnail': 're:^https?://.*\.jpg$',
195 'timestamp': 1435131709,
196 'upload_date': '20150624',
197 'uploader': 'Riot Games',
198 'uploader_id': 'riotgames',
203 'skip_download': True,
207 def _real_extract(self
, url
):
208 item_id
= self
._match
_id
(url
)
209 info
= self
._download
_info
(self
._ITEM
_SHORTCUT
, item_id
)
210 access_token
= self
._download
_json
(
211 '%s/api/vods/%s/access_token' % (self
._API
_BASE
, item_id
), item_id
,
212 'Downloading %s access token' % self
._ITEM
_TYPE
)
213 formats
= self
._extract
_m
3u8_formats
(
214 '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true'
215 % (self
._USHER
_BASE
, item_id
, access_token
['token'], access_token
['sig']),
217 self
._prefer
_source
(formats
)
218 info
['formats'] = formats
222 class TwitchPlaylistBaseIE(TwitchBaseIE
):
223 _PLAYLIST_URL
= '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE
._API
_BASE
226 def _extract_playlist(self
, channel_id
):
227 info
= self
._download
_json
(
228 '%s/kraken/channels/%s' % (self
._API
_BASE
, channel_id
),
229 channel_id
, 'Downloading channel info JSON')
230 channel_name
= info
.get('display_name') or info
.get('name')
233 limit
= self
._PAGE
_LIMIT
234 for counter
in itertools
.count(1):
235 response
= self
._download
_json
(
236 self
._PLAYLIST
_URL
% (channel_id
, offset
, limit
),
237 channel_id
, 'Downloading %s videos JSON page %d' % (self
._PLAYLIST
_TYPE
, counter
))
238 page_entries
= self
._extract
_playlist
_page
(response
)
241 entries
.extend(page_entries
)
243 return self
.playlist_result(
244 [self
.url_result(entry
) for entry
in set(entries
)],
245 channel_id
, channel_name
)
247 def _extract_playlist_page(self
, response
):
248 videos
= response
.get('videos')
249 return [video
['url'] for video
in videos
] if videos
else []
251 def _real_extract(self
, url
):
252 return self
._extract
_playlist
(self
._match
_id
(url
))
255 class TwitchProfileIE(TwitchPlaylistBaseIE
):
256 IE_NAME
= 'twitch:profile'
257 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
258 _PLAYLIST_TYPE
= 'profile'
261 'url': 'http://www.twitch.tv/vanillatv/profile',
264 'title': 'VanillaTV',
266 'playlist_mincount': 412,
270 class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE
):
271 IE_NAME
= 'twitch:past_broadcasts'
272 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
273 _PLAYLIST_URL
= TwitchPlaylistBaseIE
._PLAYLIST
_URL
+ '&broadcasts=true'
274 _PLAYLIST_TYPE
= 'past broadcasts'
277 'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
282 'playlist_mincount': 54,
286 class TwitchBookmarksIE(TwitchPlaylistBaseIE
):
287 IE_NAME
= 'twitch:bookmarks'
288 _VALID_URL
= r
'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
289 _PLAYLIST_URL
= '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE
._API
_BASE
290 _PLAYLIST_TYPE
= 'bookmarks'
293 'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
298 'playlist_mincount': 3,
301 def _extract_playlist_page(self
, response
):
303 for bookmark
in response
.get('bookmarks', []):
304 video
= bookmark
.get('video')
307 entries
.append(video
['url'])
311 class TwitchStreamIE(TwitchBaseIE
):
312 IE_NAME
= 'twitch:stream'
313 _VALID_URL
= r
'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE
._VALID
_URL
_BASE
316 'url': 'http://www.twitch.tv/shroomztv',
319 'display_id': 'shroomztv',
321 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
322 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
324 'timestamp': 1421928037,
325 'upload_date': '20150122',
326 'uploader': 'ShroomzTV',
327 'uploader_id': 'shroomztv',
332 'skip_download': True,
335 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
336 'only_matching': True,
339 def _real_extract(self
, url
):
340 channel_id
= self
._match
_id
(url
)
342 stream
= self
._download
_json
(
343 '%s/kraken/streams/%s' % (self
._API
_BASE
, channel_id
), channel_id
,
344 'Downloading stream JSON').get('stream')
346 # Fallback on profile extraction if stream is offline
348 return self
.url_result(
349 'http://www.twitch.tv/%s/profile' % channel_id
,
350 'TwitchProfile', channel_id
)
352 # Channel name may be typed if different case than the original channel name
353 # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
354 # an invalid m3u8 URL. Working around by use of original channel name from stream
355 # JSON and fallback to lowercase if it's not available.
356 channel_id
= stream
.get('channel', {}).get('name') or channel_id
.lower()
358 access_token
= self
._download
_json
(
359 '%s/api/channels/%s/access_token' % (self
._API
_BASE
, channel_id
), channel_id
,
360 'Downloading channel access token')
363 'allow_source': 'true',
364 'p': random
.randint(1000000, 10000000),
365 'player': 'twitchweb',
366 'segment_preference': '4',
367 'sig': access_token
['sig'].encode('utf-8'),
368 'token': access_token
['token'].encode('utf-8'),
370 formats
= self
._extract
_m
3u8_formats
(
371 '%s/api/channel/hls/%s.m3u8?%s'
372 % (self
._USHER
_BASE
, channel_id
, compat_urllib_parse
.urlencode(query
)),
374 self
._prefer
_source
(formats
)
376 view_count
= stream
.get('viewers')
377 timestamp
= parse_iso8601(stream
.get('created_at'))
379 channel
= stream
['channel']
380 title
= self
._live
_title
(channel
.get('display_name') or channel
.get('name'))
381 description
= channel
.get('status')
384 for thumbnail_key
, thumbnail_url
in stream
['preview'].items():
385 m
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key
)
389 'url': thumbnail_url
,
390 'width': int(m
.group('width')),
391 'height': int(m
.group('height')),
395 'id': compat_str(stream
['_id']),
396 'display_id': channel_id
,
398 'description': description
,
399 'thumbnails': thumbnails
,
400 'uploader': channel
.get('display_name'),
401 'uploader_id': channel
.get('name'),
402 'timestamp': timestamp
,
403 'view_count': view_count
,