]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/justintv.py
b2006e33499a31096d516b88e01c9f81f14392a6
   4 import xml
.etree
.ElementTree
 
   6 from .common 
import InfoExtractor
 
  13 class JustinTVIE(InfoExtractor
): 
  14     """Information extractor for justin.tv and twitch.tv""" 
  15     # TODO: One broadcast may be split into multiple videos. The key 
  16     # 'broadcast_id' is the same for all parts, and 'broadcast_part' 
  17     # starts at 1 and increases. Can we treat all parts as one video? 
  19     _VALID_URL 
= r
"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ 
  22             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| 
  23             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) 
  27     _JUSTIN_PAGE_LIMIT 
= 100 
  28     IE_NAME 
= u
'justin.tv' 
  30     def report_download_page(self
, channel
, offset
): 
  31         """Report attempt to download a single page of videos.""" 
  32         self
.to_screen(u
'%s: Downloading video information from %d to %d' % 
  33                 (channel
, offset
, offset 
+ self
._JUSTIN
_PAGE
_LIMIT
)) 
  35     # Return count of items, list of *valid* items 
  36     def _parse_page(self
, url
, video_id
): 
  37         info_json 
= self
._download
_webpage
(url
, video_id
, 
  38                                            u
'Downloading video info JSON', 
  39                                            u
'unable to download video info JSON') 
  41         response 
= json
.loads(info_json
) 
  42         if type(response
) != list: 
  43             error_text 
= response
.get('error', 'unknown error') 
  44             raise ExtractorError(u
'Justin.tv API: %s' % error_text
) 
  47             video_url 
= clip
['video_file_url'] 
  49                 video_extension 
= os
.path
.splitext(video_url
)[1][1:] 
  50                 video_date 
= re
.sub('-', '', clip
['start_time'][:10]) 
  51                 video_uploader_id 
= clip
.get('user_id', clip
.get('channel_id')) 
  53                 video_title 
= clip
.get('title', video_id
) 
  58                     'uploader': clip
.get('channel_name', video_uploader_id
), 
  59                     'uploader_id': video_uploader_id
, 
  60                     'upload_date': video_date
, 
  61                     'ext': video_extension
, 
  63         return (len(response
), info
) 
  65     def _real_extract(self
, url
): 
  66         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  68             raise ExtractorError(u
'invalid URL: %s' % url
) 
  70         api_base 
= 'http://api.justin.tv' 
  72         if mobj
.group('channelid'): 
  74             video_id 
= mobj
.group('channelid') 
  75             api 
= api_base 
+ '/channel/archives/%s.json' % video_id
 
  76         elif mobj
.group('chapterid'): 
  77             chapter_id 
= mobj
.group('chapterid') 
  79             webpage 
= self
._download
_webpage
(url
, chapter_id
) 
  80             m 
= re
.search(r
'PP\.archive_id = "([0-9]+)";', webpage
) 
  82                 raise ExtractorError(u
'Cannot find archive of a chapter') 
  83             archive_id 
= m
.group(1) 
  85             api 
= api_base 
+ '/broadcast/by_chapter/%s.xml' % chapter_id
 
  86             chapter_info_xml 
= self
._download
_webpage
(api
, chapter_id
, 
  87                                              note
=u
'Downloading chapter information', 
  88                                              errnote
=u
'Chapter information download failed') 
  89             doc 
= xml
.etree
.ElementTree
.fromstring(chapter_info_xml
) 
  90             for a 
in doc
.findall('.//archive'): 
  91                 if archive_id 
== a
.find('./id').text
: 
  94                 raise ExtractorError(u
'Could not find chapter in chapter information') 
  96             video_url 
= a
.find('./video_file_url').text
 
  97             video_ext 
= video_url
.rpartition('.')[2] or u
'flv' 
  99             chapter_api_url 
= u
'https://api.twitch.tv/kraken/videos/c' + chapter_id
 
 100             chapter_info_json 
= self
._download
_webpage
(chapter_api_url
, u
'c' + chapter_id
, 
 101                                    note
='Downloading chapter metadata', 
 102                                    errnote
='Download of chapter metadata failed') 
 103             chapter_info 
= json
.loads(chapter_info_json
) 
 105             bracket_start 
= int(doc
.find('.//bracket_start').text
) 
 106             bracket_end 
= int(doc
.find('.//bracket_end').text
) 
 108             # TODO determine start (and probably fix up file) 
 109             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 
 110             #video_url += u'?start=' + TODO:start_timestamp 
 111             # bracket_start is 13290, but we want 51670615 
 112             self
._downloader
.report_warning(u
'Chapter detected, but we can just download the whole file. ' 
 113                                             u
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start
), formatSeconds(bracket_end
))) 
 116                 'id': u
'c' + chapter_id
, 
 119                 'title': chapter_info
['title'], 
 120                 'thumbnail': chapter_info
['preview'], 
 121                 'description': chapter_info
['description'], 
 122                 'uploader': chapter_info
['channel']['display_name'], 
 123                 'uploader_id': chapter_info
['channel']['name'], 
 127             video_id 
= mobj
.group('videoid') 
 128             api 
= api_base 
+ '/broadcast/by_archive/%s.json' % video_id
 
 130         self
.report_extraction(video_id
) 
 134         limit 
= self
._JUSTIN
_PAGE
_LIMIT
 
 137                 self
.report_download_page(video_id
, offset
) 
 138             page_url 
= api 
+ ('?offset=%d&limit=%d' % (offset
, limit
)) 
 139             page_count
, page_info 
= self
._parse
_page
(page_url
, video_id
) 
 140             info
.extend(page_info
) 
 141             if not paged 
or page_count 
!= limit
: