2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
10 compat_urllib_request
,
18 class TwitchIE(InfoExtractor
):
19 # TODO: One broadcast may be split into multiple videos. The key
20 # 'broadcast_id' is the same for all parts, and 'broadcast_part'
21 # starts at 1 and increases. Can we treat all parts as one video?
22 _VALID_URL
= r
"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
25 (?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
26 (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
27 (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
32 _API_BASE
= 'https://api.twitch.tv'
33 _LOGIN_URL
= 'https://secure.twitch.tv/user/login'
35 'url': 'http://www.twitch.tv/riotgames/b/577357806',
38 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
40 'playlist_mincount': 12,
42 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
45 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
47 'playlist_mincount': 3,
49 'url': 'http://www.twitch.tv/vanillatv',
54 'playlist_mincount': 412,
57 def _handle_error(self
, response
):
58 if not isinstance(response
, dict):
60 error
= response
.get('error')
63 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')),
66 def _download_json(self
, url
, video_id
, note
='Downloading JSON metadata'):
67 response
= super(TwitchIE
, self
)._download
_json
(url
, video_id
, note
)
68 self
._handle
_error
(response
)
71 def _extract_media(self
, item
, item_id
):
77 info
= self
._extract
_info
(self
._download
_json
(
78 '%s/kraken/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
,
79 'Downloading %s info JSON' % ITEMS
[item
]))
82 access_token
= self
._download
_json
(
83 '%s/api/vods/%s/access_token' % (self
._API
_BASE
, item_id
), item_id
,
84 'Downloading %s access token' % ITEMS
[item
])
85 formats
= self
._extract
_m
3u8_formats
(
86 'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
87 % (item_id
, access_token
['token'], access_token
['sig']),
89 info
['formats'] = formats
92 response
= self
._download
_json
(
93 '%s/api/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
,
94 'Downloading %s playlist JSON' % ITEMS
[item
])
96 chunks
= response
['chunks']
97 qualities
= list(chunks
.keys())
98 for num
, fragment
in enumerate(zip(*chunks
.values()), start
=1):
100 for fmt_num
, fragment_fmt
in enumerate(fragment
):
101 format_id
= qualities
[fmt_num
]
103 'url': fragment_fmt
['url'],
104 'format_id': format_id
,
105 'quality': 1 if format_id
== 'live' else 0,
107 m
= re
.search(r
'^(?P<height>\d+)[Pp]', format_id
)
109 fmt
['height'] = int(m
.group('height'))
111 self
._sort
_formats
(formats
)
113 entry
['id'] = '%s_%d' % (entry
['id'], num
)
114 entry
['title'] = '%s part %d' % (entry
['title'], num
)
115 entry
['formats'] = formats
116 entries
.append(entry
)
117 return self
.playlist_result(entries
, info
['id'], info
['title'])
119 def _extract_info(self
, info
):
122 'title': info
['title'],
123 'description': info
['description'],
124 'duration': info
['length'],
125 'thumbnail': info
['preview'],
126 'uploader': info
['channel']['display_name'],
127 'uploader_id': info
['channel']['name'],
128 'timestamp': parse_iso8601(info
['recorded_at']),
129 'view_count': info
['views'],
132 def _real_initialize(self
):
136 (username
, password
) = self
._get
_login
_info
()
140 login_page
= self
._download
_webpage
(
141 self
._LOGIN
_URL
, None, 'Downloading login page')
143 authenticity_token
= self
._search
_regex
(
144 r
'<input name="authenticity_token" type="hidden" value="([^"]+)"',
145 login_page
, 'authenticity token')
148 'utf8': '✓'.encode('utf-8'),
149 'authenticity_token': authenticity_token
,
150 'redirect_on_login': '',
151 'embed_form': 'false',
152 'mp_source_action': '',
154 'user[login]': username
,
155 'user[password]': password
,
158 request
= compat_urllib_request
.Request(
159 self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
).encode('utf-8'))
160 request
.add_header('Referer', self
._LOGIN
_URL
)
161 response
= self
._download
_webpage
(
162 request
, None, 'Logging in as %s' % username
)
165 r
"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
167 raise ExtractorError(
168 'Unable to login
: %s' % m.group('msg
').strip(), expected=True)
170 def _real_extract(self, url):
171 mobj = re.match(self._VALID_URL, url)
172 if mobj.group('chapterid
'):
173 return self._extract_media('c
', mobj.group('chapterid
'))
176 webpage = self._download_webpage(url, chapter_id)
177 m = re.search(r'PP\
.archive_id
= "([0-9]+)";', webpage)
179 raise ExtractorError('Cannot find archive of a chapter
')
180 archive_id = m.group(1)
182 api = api_base + '/broadcast
/by_chapter
/%s.xml
' % chapter_id
183 doc = self._download_xml(
185 note='Downloading chapter information
',
186 errnote='Chapter information download failed
')
187 for a in doc.findall('.//archive
'):
188 if archive_id == a.find('./id').text:
191 raise ExtractorError('Could
not find chapter
in chapter information
')
193 video_url = a.find('./video_file_url
').text
194 video_ext = video_url.rpartition('.')[2] or 'flv
'
196 chapter_api_url = 'https
://api
.twitch
.tv
/kraken
/videos
/c
' + chapter_id
197 chapter_info = self._download_json(
198 chapter_api_url, 'c
' + chapter_id,
199 note='Downloading chapter metadata
',
200 errnote='Download of chapter metadata failed
')
202 bracket_start = int(doc.find('.//bracket_start
').text)
203 bracket_end = int(doc.find('.//bracket_end
').text)
205 # TODO determine start (and probably fix up file)
206 # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
207 #video_url += '?start
=' + TODO:start_timestamp
208 # bracket_start is 13290, but we want 51670615
209 self._downloader.report_warning('Chapter detected
, but we can just download the whole
file. '
210 'Chapter starts at
%s and ends at
%s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
213 'id': 'c
' + chapter_id,
216 'title
': chapter_info['title
'],
217 'thumbnail
': chapter_info['preview
'],
218 'description
': chapter_info['description
'],
219 'uploader
': chapter_info['channel
']['display_name
'],
220 'uploader_id
': chapter_info['channel
']['name
'],
224 elif mobj.group('videoid
'):
225 return self._extract_media('a
', mobj.group('videoid
'))
226 elif mobj.group('vodid
'):
227 return self._extract_media('v
', mobj.group('vodid
'))
228 elif mobj.group('channelid
'):
229 channel_id = mobj.group('channelid
')
230 info = self._download_json(
231 '%s/kraken
/channels
/%s' % (self._API_BASE, channel_id),
232 channel_id, 'Downloading channel info JSON
')
233 channel_name = info.get('display_name
') or info.get('name
')
236 limit = self._PAGE_LIMIT
237 for counter in itertools.count(1):
238 response = self._download_json(
239 '%s/kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d'
240 % (self._API_BASE, channel_id, offset, limit),
241 channel_id, 'Downloading channel videos JSON page
%d' % counter)
242 videos = response['videos
']
245 entries.extend([self.url_result(video['url
'], 'Twitch
') for video in videos])
247 return self.playlist_result(entries, channel_id, channel_name)