]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitch.py
36aa1ad6ec578859d90c947dee9c39213dcfda59
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  13 class TwitchIE(InfoExtractor
): 
  14     # TODO: One broadcast may be split into multiple videos. The key 
  15     # 'broadcast_id' is the same for all parts, and 'broadcast_part' 
  16     # starts at 1 and increases. Can we treat all parts as one video? 
  17     _VALID_URL 
= r
"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/ 
  20             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| 
  21             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) 
  26     _API_BASE 
= 'https://api.twitch.tv' 
  28         'url': 'http://www.twitch.tv/riotgames/b/577357806', 
  31             'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', 
  33         'playlist_mincount': 12, 
  35         'url': 'http://www.twitch.tv/acracingleague/c/5285812', 
  38             'title': 'ACRL Off Season - Sports Cars @ Nordschleife', 
  40         'playlist_mincount': 3, 
  42         'url': 'http://www.twitch.tv/vanillatv', 
  47         'playlist_mincount': 412, 
  50     def _handle_error(self
, response
): 
  51         if not isinstance(response
, dict): 
  53         error 
= response
.get('error') 
  56                 '%s returned error: %s - %s' % (self
.IE_NAME
, error
, response
.get('message')), 
  59     def _download_json(self
, url
, video_id
, note
='Downloading JSON metadata'): 
  60         response 
= super(TwitchIE
, self
)._download
_json
(url
, video_id
, note
) 
  61         self
._handle
_error
(response
) 
  64     def _extract_media(self
, item
, item_id
): 
  69         info 
= self
._extract
_info
(self
._download
_json
( 
  70             '%s/kraken/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
, 
  71             'Downloading %s info JSON' % ITEMS
[item
])) 
  72         response 
= self
._download
_json
( 
  73             '%s/api/videos/%s%s' % (self
._API
_BASE
, item
, item_id
), item_id
, 
  74             'Downloading %s playlist JSON' % ITEMS
[item
]) 
  76         chunks 
= response
['chunks'] 
  77         qualities 
= list(chunks
.keys()) 
  78         for num
, fragment 
in enumerate(zip(*chunks
.values()), start
=1): 
  80             for fmt_num
, fragment_fmt 
in enumerate(fragment
): 
  81                 format_id 
= qualities
[fmt_num
] 
  83                     'url': fragment_fmt
['url'], 
  84                     'format_id': format_id
, 
  85                     'quality': 1 if format_id 
== 'live' else 0, 
  87                 m 
= re
.search(r
'^(?P<height>\d+)[Pp]', format_id
) 
  89                     fmt
['height'] = int(m
.group('height')) 
  91             self
._sort
_formats
(formats
) 
  93             entry
['id'] = '%s_%d' % (entry
['id'], num
) 
  94             entry
['title'] = '%s part %d' % (entry
['title'], num
) 
  95             entry
['formats'] = formats
 
  97         return self
.playlist_result(entries
, info
['id'], info
['title']) 
  99     def _extract_info(self
, info
): 
 102             'title': info
['title'], 
 103             'description': info
['description'], 
 104             'duration': info
['length'], 
 105             'thumbnail': info
['preview'], 
 106             'uploader': info
['channel']['display_name'], 
 107             'uploader_id': info
['channel']['name'], 
 108             'timestamp': parse_iso8601(info
['recorded_at']), 
 109             'view_count': info
['views'], 
 112     def _real_extract(self
, url
): 
 113         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 114         if mobj
.group('chapterid'): 
 115             return self
._extract
_media
('c', mobj
.group('chapterid')) 
 118             webpage = self._download_webpage(url, chapter_id) 
 119             m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) 
 121                 raise ExtractorError('Cannot find archive of a chapter') 
 122             archive_id = m.group(1) 
 124             api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id 
 125             doc = self._download_xml( 
 127                 note='Downloading chapter information', 
 128                 errnote='Chapter information download failed') 
 129             for a in doc.findall('.//archive'): 
 130                 if archive_id == a.find('./id').text: 
 133                 raise ExtractorError('Could not find chapter in chapter information') 
 135             video_url = a.find('./video_file_url').text 
 136             video_ext = video_url.rpartition('.')[2] or 'flv' 
 138             chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id 
 139             chapter_info = self._download_json( 
 140                 chapter_api_url, 'c' + chapter_id, 
 141                 note='Downloading chapter metadata', 
 142                 errnote='Download of chapter metadata failed') 
 144             bracket_start = int(doc.find('.//bracket_start').text) 
 145             bracket_end = int(doc.find('.//bracket_end').text) 
 147             # TODO determine start (and probably fix up file) 
 148             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 
 149             #video_url += '?start=' + TODO:start_timestamp 
 150             # bracket_start is 13290, but we want 51670615 
 151             self._downloader.report_warning('Chapter detected, but we can just download the whole file. ' 
 152                                             'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) 
 155                 'id': 'c' + chapter_id, 
 158                 'title': chapter_info['title'], 
 159                 'thumbnail': chapter_info['preview'], 
 160                 'description': chapter_info['description'], 
 161                 'uploader': chapter_info['channel']['display_name'], 
 162                 'uploader_id': chapter_info['channel']['name'], 
 166         elif mobj
.group('videoid'): 
 167             return self
._extract
_media
('a', mobj
.group('videoid')) 
 168         elif mobj
.group('channelid'): 
 169             channel_id 
= mobj
.group('channelid') 
 170             info 
= self
._download
_json
( 
 171                 '%s/kraken/channels/%s' % (self
._API
_BASE
, channel_id
), 
 172                 channel_id
, 'Downloading channel info JSON') 
 173             channel_name 
= info
.get('display_name') or info
.get('name') 
 176             limit 
= self
._PAGE
_LIMIT
 
 177             for counter 
in itertools
.count(1): 
 178                 response 
= self
._download
_json
( 
 179                     '%s/kraken/channels/%s/videos/?offset=%d&limit=%d' 
 180                     % (self
._API
_BASE
, channel_id
, offset
, limit
), 
 181                     channel_id
, 'Downloading channel videos JSON page %d' % counter
) 
 182                 videos 
= response
['videos'] 
 185                 entries
.extend([self
.url_result(video
['url'], 'Twitch') for video 
in videos
]) 
 187             return self
.playlist_result(entries
, channel_id
, channel_name
)