]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/justintv.py
27017e89f632880c21643c0b58f04d23e963fd39
   1 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  16 class JustinTVIE(InfoExtractor
): 
  17     """Information extractor for justin.tv and twitch.tv""" 
  18     # TODO: One broadcast may be split into multiple videos. The key 
  19     # 'broadcast_id' is the same for all parts, and 'broadcast_part' 
  20     # starts at 1 and increases. Can we treat all parts as one video? 
  22     _VALID_URL 
= r
"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ 
  25             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| 
  26             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) 
  30     _JUSTIN_PAGE_LIMIT 
= 100 
  32     IE_DESC 
= 'justin.tv and twitch.tv' 
  34         'url': 'http://www.twitch.tv/thegamedevhub/b/296128360', 
  35         'md5': 'ecaa8a790c22a40770901460af191c9a', 
  39             'upload_date': '20110927', 
  40             'uploader_id': 25114803, 
  41             'uploader': 'thegamedevhub', 
  42             'title': 'Beginner Series - Scripting With Python Pt.1' 
  46     # Return count of items, list of *valid* items 
  47     def _parse_page(self
, url
, video_id
, counter
): 
  48         info_json 
= self
._download
_webpage
( 
  50             'Downloading video info JSON on page %d' % counter
, 
  51             'Unable to download video info JSON %d' % counter
) 
  53         response 
= json
.loads(info_json
) 
  54         if type(response
) != list: 
  55             error_text 
= response
.get('error', 'unknown error') 
  56             raise ExtractorError('Justin.tv API: %s' % error_text
) 
  59             video_url 
= clip
['video_file_url'] 
  61                 video_extension 
= os
.path
.splitext(video_url
)[1][1:] 
  62                 video_date 
= re
.sub('-', '', clip
['start_time'][:10]) 
  63                 video_uploader_id 
= clip
.get('user_id', clip
.get('channel_id')) 
  65                 video_title 
= clip
.get('title', video_id
) 
  67                     'id': compat_str(video_id
), 
  70                     'uploader': clip
.get('channel_name', video_uploader_id
), 
  71                     'uploader_id': video_uploader_id
, 
  72                     'upload_date': video_date
, 
  73                     'ext': video_extension
, 
  75         return (len(response
), info
) 
  77     def _real_extract(self
, url
): 
  78         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  80         api_base 
= 'http://api.justin.tv' 
  82         if mobj
.group('channelid'): 
  84             video_id 
= mobj
.group('channelid') 
  85             api 
= api_base 
+ '/channel/archives/%s.json' % video_id
 
  86         elif mobj
.group('chapterid'): 
  87             chapter_id 
= mobj
.group('chapterid') 
  89             webpage 
= self
._download
_webpage
(url
, chapter_id
) 
  90             m 
= re
.search(r
'PP\.archive_id = "([0-9]+)";', webpage
) 
  92                 raise ExtractorError('Cannot find archive of a chapter') 
  93             archive_id 
= m
.group(1) 
  95             api 
= api_base 
+ '/broadcast/by_chapter/%s.xml' % chapter_id
 
  96             doc 
= self
._download
_xml
( 
  98                 note
='Downloading chapter information', 
  99                 errnote
='Chapter information download failed') 
 100             for a 
in doc
.findall('.//archive'): 
 101                 if archive_id 
== a
.find('./id').text
: 
 104                 raise ExtractorError('Could not find chapter in chapter information') 
 106             video_url 
= a
.find('./video_file_url').text
 
 107             video_ext 
= video_url
.rpartition('.')[2] or 'flv' 
 109             chapter_api_url 
= 'https://api.twitch.tv/kraken/videos/c' + chapter_id
 
 110             chapter_info 
= self
._download
_json
( 
 111                 chapter_api_url
, 'c' + chapter_id
, 
 112                 note
='Downloading chapter metadata', 
 113                 errnote
='Download of chapter metadata failed') 
 115             bracket_start 
= int(doc
.find('.//bracket_start').text
) 
 116             bracket_end 
= int(doc
.find('.//bracket_end').text
) 
 118             # TODO determine start (and probably fix up file) 
 119             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 
 120             #video_url += '?start=' + TODO:start_timestamp 
 121             # bracket_start is 13290, but we want 51670615 
 122             self
._downloader
.report_warning('Chapter detected, but we can just download the whole file. ' 
 123                                             'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start
), formatSeconds(bracket_end
))) 
 126                 'id': 'c' + chapter_id
, 
 129                 'title': chapter_info
['title'], 
 130                 'thumbnail': chapter_info
['preview'], 
 131                 'description': chapter_info
['description'], 
 132                 'uploader': chapter_info
['channel']['display_name'], 
 133                 'uploader_id': chapter_info
['channel']['name'], 
 137             video_id 
= mobj
.group('videoid') 
 138             api 
= api_base 
+ '/broadcast/by_archive/%s.json' % video_id
 
 142         limit 
= self
._JUSTIN
_PAGE
_LIMIT
 
 143         for counter 
in itertools
.count(1): 
 144             page_url 
= api 
+ ('?offset=%d&limit=%d' % (offset
, limit
)) 
 145             page_count
, page_info 
= self
._parse
_page
( 
 146                 page_url
, video_id
, counter
) 
 147             entries
.extend(page_info
) 
 148             if not paged 
or page_count 
!= limit
: