2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  10     compat_urllib_request
, 
  18 class TwitchIE(InfoExtractor
): 
  19     # TODO: One broadcast may be split into multiple videos. The key 
  20     # 'broadcast_id' is the same for all parts, and 'broadcast_part' 
  21     # starts at 1 and increases. Can we treat all parts as one video? 
  22     _VALID_URL 
= r
"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/ 
  25             (?:(?:[^/]+)/v/(?P<vodid>[^/]+))| 
  26             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| 
  27             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) 
  32     _API_BASE 
= 'https://api.twitch.tv' 
  33     _LOGIN_URL 
= 'https://secure.twitch.tv/user/login' 
  35         'url': 'http://www.twitch.tv/riotgames/b/577357806', 
  38             'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', 
  40         'playlist_mincount': 12, 
  42         'url': 'http://www.twitch.tv/acracingleague/c/5285812', 
  45             'title': 'ACRL Off Season - Sports Cars @ Nordschleife', 
  47         'playlist_mincount': 3, 
  49         'url': 'http://www.twitch.tv/vanillatv', 
  54         'playlist_mincount': 412, 
  57     def _handle_error(self
, response
): 
  58         if not isinstance(response
, dict): 
  60         error 
= response
.get('error') 
  63                 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')), 
  66     def _download_json(self
, url
, video_id
, note
='Downloading JSON metadata'): 
  67         response 
= super(TwitchIE
, self
)._download
_json
(url
, video_id
, note
) 
  68         self
._handle
_error
(response
) 
  71     def _extract_media(self
, item
, item_id
): 
  77         info 
= self
._extract
_info
(self
._download
_json
( 
  78             '%s/kraken/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
, 
  79             'Downloading %s info JSON' % ITEMS
[item
])) 
  82             access_token 
= self
._download
_json
( 
  83                 '%s/api/vods/%s/access_token' % (self
._API
_BASE
, item_id
), item_id
, 
  84                 'Downloading %s access token' % ITEMS
[item
]) 
  85             formats 
= self
._extract
_m
3u8_formats
( 
  86                 'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s' 
  87                 % (item_id
, access_token
['token'], access_token
['sig']), 
  89             info
['formats'] = formats
 
  92         response 
= self
._download
_json
( 
  93             '%s/api/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
, 
  94             'Downloading %s playlist JSON' % ITEMS
[item
]) 
  96         chunks 
= response
['chunks'] 
  97         qualities 
= list(chunks
.keys()) 
  98         for num
, fragment 
in enumerate(zip(*chunks
.values()), start
=1): 
 100             for fmt_num
, fragment_fmt 
in enumerate(fragment
): 
 101                 format_id 
= qualities
[fmt_num
] 
 103                     'url': fragment_fmt
['url'], 
 104                     'format_id': format_id
, 
 105                     'quality': 1 if format_id 
== 'live' else 0, 
 107                 m 
= re
.search(r
'^(?P<height>\d+)[Pp]', format_id
) 
 109                     fmt
['height'] = int(m
.group('height')) 
 111             self
._sort
_formats
(formats
) 
 113             entry
['id'] = '%s_%d' % (entry
['id'], num
) 
 114             entry
['title'] = '%s part %d' % (entry
['title'], num
) 
 115             entry
['formats'] = formats
 
 116             entries
.append(entry
) 
 117         return self
.playlist_result(entries
, info
['id'], info
['title']) 
 119     def _extract_info(self
, info
): 
 122             'title': info
['title'], 
 123             'description': info
['description'], 
 124             'duration': info
['length'], 
 125             'thumbnail': info
['preview'], 
 126             'uploader': info
['channel']['display_name'], 
 127             'uploader_id': info
['channel']['name'], 
 128             'timestamp': parse_iso8601(info
['recorded_at']), 
 129             'view_count': info
['views'], 
 132     def _real_initialize(self
): 
 136         (username
, password
) = self
._get
_login
_info
() 
 140         login_page 
= self
._download
_webpage
( 
 141             self
._LOGIN
_URL
, None, 'Downloading login page') 
 143         authenticity_token 
= self
._search
_regex
( 
 144             r
'<input name="authenticity_token" type="hidden" value="([^"]+)"', 
 145             login_page
, 'authenticity token') 
 148             'utf8': '✓'.encode('utf-8'), 
 149             'authenticity_token': authenticity_token
, 
 150             'redirect_on_login': '', 
 151             'embed_form': 'false', 
 152             'mp_source_action': '', 
 154             'user[login]': username
, 
 155             'user[password]': password
, 
 158         request 
= compat_urllib_request
.Request( 
 159             self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
).encode('utf-8')) 
 160         request
.add_header('Referer', self
._LOGIN
_URL
) 
 161         response 
= self
._download
_webpage
( 
 162             request
, None, 'Logging in as %s' % username
) 
 165             r
"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response) 
 167             raise ExtractorError( 
 168                 'Unable to login
: %s' % m.group('msg
').strip(), expected=True) 
 170     def _real_extract(self, url): 
 171         mobj = re.match(self._VALID_URL, url) 
 172         if mobj.group('chapterid
'): 
 173             return self._extract_media('c
', mobj.group('chapterid
')) 
 176             webpage = self._download_webpage(url, chapter_id) 
 177             m = re.search(r'PP\
.archive_id 
= "([0-9]+)";', webpage) 
 179                 raise ExtractorError('Cannot find archive of a chapter
') 
 180             archive_id = m.group(1) 
 182             api = api_base + '/broadcast
/by_chapter
/%s.xml
' % chapter_id 
 183             doc = self._download_xml( 
 185                 note='Downloading chapter information
', 
 186                 errnote='Chapter information download failed
') 
 187             for a in doc.findall('.//archive
'): 
 188                 if archive_id == a.find('./id').text: 
 191                 raise ExtractorError('Could 
not find chapter 
in chapter information
') 
 193             video_url = a.find('./video_file_url
').text 
 194             video_ext = video_url.rpartition('.')[2] or 'flv
' 
 196             chapter_api_url = 'https
://api
.twitch
.tv
/kraken
/videos
/c
' + chapter_id 
 197             chapter_info = self._download_json( 
 198                 chapter_api_url, 'c
' + chapter_id, 
 199                 note='Downloading chapter metadata
', 
 200                 errnote='Download of chapter metadata failed
') 
 202             bracket_start = int(doc.find('.//bracket_start
').text) 
 203             bracket_end = int(doc.find('.//bracket_end
').text) 
 205             # TODO determine start (and probably fix up file) 
 206             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 
 207             #video_url += '?start
=' + TODO:start_timestamp 
 208             # bracket_start is 13290, but we want 51670615 
 209             self._downloader.report_warning('Chapter detected
, but we can just download the whole 
file. ' 
 210                                             'Chapter starts at 
%s and ends at 
%s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) 
 213                 'id': 'c
' + chapter_id, 
 216                 'title
': chapter_info['title
'], 
 217                 'thumbnail
': chapter_info['preview
'], 
 218                 'description
': chapter_info['description
'], 
 219                 'uploader
': chapter_info['channel
']['display_name
'], 
 220                 'uploader_id
': chapter_info['channel
']['name
'], 
 224         elif mobj.group('videoid
'): 
 225             return self._extract_media('a
', mobj.group('videoid
')) 
 226         elif mobj.group('vodid
'): 
 227             return self._extract_media('v
', mobj.group('vodid
')) 
 228         elif mobj.group('channelid
'): 
 229             channel_id = mobj.group('channelid
') 
 230             info = self._download_json( 
 231                 '%s/kraken
/channels
/%s' % (self._API_BASE, channel_id), 
 232                 channel_id, 'Downloading channel info JSON
') 
 233             channel_name = info.get('display_name
') or info.get('name
') 
 236             limit = self._PAGE_LIMIT 
 237             for counter in itertools.count(1): 
 238                 response = self._download_json( 
 239                     '%s/kraken
/channels
/%s/videos
/?offset
=%d&limit
=%d' 
 240                     % (self._API_BASE, channel_id, offset, limit), 
 241                     channel_id, 'Downloading channel videos JSON page 
%d' % counter) 
 242                 videos = response['videos
'] 
 245                 entries.extend([self.url_result(video['url
'], 'Twitch
') for video in videos]) 
 247             return self.playlist_result(entries, channel_id, channel_name)