]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/justintv.py
7083db12ea012720f5dfda7039fdad9e21c12cc9
   1 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  15 class JustinTVIE(InfoExtractor
): 
  16     """Information extractor for justin.tv and twitch.tv""" 
  17     # TODO: One broadcast may be split into multiple videos. The key 
  18     # 'broadcast_id' is the same for all parts, and 'broadcast_part' 
  19     # starts at 1 and increases. Can we treat all parts as one video? 
  21     _VALID_URL 
= r
"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ 
  24             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| 
  25             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) 
  29     _JUSTIN_PAGE_LIMIT 
= 100 
  31     IE_DESC 
= 'justin.tv and twitch.tv' 
  33         'url': 'http://www.twitch.tv/thegamedevhub/b/296128360', 
  34         'md5': 'ecaa8a790c22a40770901460af191c9a', 
  38             'upload_date': '20110927', 
  39             'uploader_id': 25114803, 
  40             'uploader': 'thegamedevhub', 
  41             'title': 'Beginner Series - Scripting With Python Pt.1' 
  45     # Return count of items, list of *valid* items 
  46     def _parse_page(self
, url
, video_id
): 
  47         info_json 
= self
._download
_webpage
(url
, video_id
, 
  48                                            'Downloading video info JSON', 
  49                                            'unable to download video info JSON') 
  51         response 
= json
.loads(info_json
) 
  52         if type(response
) != list: 
  53             error_text 
= response
.get('error', 'unknown error') 
  54             raise ExtractorError('Justin.tv API: %s' % error_text
) 
  57             video_url 
= clip
['video_file_url'] 
  59                 video_extension 
= os
.path
.splitext(video_url
)[1][1:] 
  60                 video_date 
= re
.sub('-', '', clip
['start_time'][:10]) 
  61                 video_uploader_id 
= clip
.get('user_id', clip
.get('channel_id')) 
  63                 video_title 
= clip
.get('title', video_id
) 
  65                     'id': compat_str(video_id
), 
  68                     'uploader': clip
.get('channel_name', video_uploader_id
), 
  69                     'uploader_id': video_uploader_id
, 
  70                     'upload_date': video_date
, 
  71                     'ext': video_extension
, 
  73         return (len(response
), info
) 
  75     def _real_extract(self
, url
): 
  76         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  78         api_base 
= 'http://api.justin.tv' 
  80         if mobj
.group('channelid'): 
  82             video_id 
= mobj
.group('channelid') 
  83             api 
= api_base 
+ '/channel/archives/%s.json' % video_id
 
  84         elif mobj
.group('chapterid'): 
  85             chapter_id 
= mobj
.group('chapterid') 
  87             webpage 
= self
._download
_webpage
(url
, chapter_id
) 
  88             m 
= re
.search(r
'PP\.archive_id = "([0-9]+)";', webpage
) 
  90                 raise ExtractorError('Cannot find archive of a chapter') 
  91             archive_id 
= m
.group(1) 
  93             api 
= api_base 
+ '/broadcast/by_chapter/%s.xml' % chapter_id
 
  94             doc 
= self
._download
_xml
( 
  96                 note
='Downloading chapter information', 
  97                 errnote
='Chapter information download failed') 
  98             for a 
in doc
.findall('.//archive'): 
  99                 if archive_id 
== a
.find('./id').text
: 
 102                 raise ExtractorError('Could not find chapter in chapter information') 
 104             video_url 
= a
.find('./video_file_url').text
 
 105             video_ext 
= video_url
.rpartition('.')[2] or 'flv' 
 107             chapter_api_url 
= 'https://api.twitch.tv/kraken/videos/c' + chapter_id
 
 108             chapter_info 
= self
._download
_json
( 
 109                 chapter_api_url
, 'c' + chapter_id
, 
 110                 note
='Downloading chapter metadata', 
 111                 errnote
='Download of chapter metadata failed') 
 113             bracket_start 
= int(doc
.find('.//bracket_start').text
) 
 114             bracket_end 
= int(doc
.find('.//bracket_end').text
) 
 116             # TODO determine start (and probably fix up file) 
 117             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 
 118             #video_url += '?start=' + TODO:start_timestamp 
 119             # bracket_start is 13290, but we want 51670615 
 120             self
._downloader
.report_warning('Chapter detected, but we can just download the whole file. ' 
 121                                             'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start
), formatSeconds(bracket_end
))) 
 124                 'id': 'c' + chapter_id
, 
 127                 'title': chapter_info
['title'], 
 128                 'thumbnail': chapter_info
['preview'], 
 129                 'description': chapter_info
['description'], 
 130                 'uploader': chapter_info
['channel']['display_name'], 
 131                 'uploader_id': chapter_info
['channel']['name'], 
 135             video_id 
= mobj
.group('videoid') 
 136             api 
= api_base 
+ '/broadcast/by_archive/%s.json' % video_id
 
 140         limit 
= self
._JUSTIN
_PAGE
_LIMIT
 
 143                 self
.report_download_page(video_id
, offset
) 
 144             page_url 
= api 
+ ('?offset=%d&limit=%d' % (offset
, limit
)) 
 145             page_count
, page_info 
= self
._parse
_page
(page_url
, video_id
) 
 146             entries
.extend(page_info
) 
 147             if not paged 
or page_count 
!= limit
: