]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/justintv.py
   4 import xml
.etree
.ElementTree
 
   6 from .common 
import InfoExtractor
 
  13 class JustinTVIE(InfoExtractor
): 
  14     """Information extractor for justin.tv and twitch.tv""" 
  15     # TODO: One broadcast may be split into multiple videos. The key 
  16     # 'broadcast_id' is the same for all parts, and 'broadcast_part' 
  17     # starts at 1 and increases. Can we treat all parts as one video? 
  19     _VALID_URL 
= r
"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ 
  22             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| 
  23             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) 
  27     _JUSTIN_PAGE_LIMIT 
= 100 
  28     IE_NAME 
= u
'justin.tv' 
  30         u
'url': u
'http://www.twitch.tv/thegamedevhub/b/296128360', 
  31         u
'file': u
'296128360.flv', 
  32         u
'md5': u
'ecaa8a790c22a40770901460af191c9a', 
  34             u
"upload_date": u
"20110927",  
  35             u
"uploader_id": 25114803,  
  36             u
"uploader": u
"thegamedevhub",  
  37             u
"title": u
"Beginner Series - Scripting With Python Pt.1" 
  41     def report_download_page(self
, channel
, offset
): 
  42         """Report attempt to download a single page of videos.""" 
  43         self
.to_screen(u
'%s: Downloading video information from %d to %d' % 
  44                 (channel
, offset
, offset 
+ self
._JUSTIN
_PAGE
_LIMIT
)) 
  46     # Return count of items, list of *valid* items 
  47     def _parse_page(self
, url
, video_id
): 
  48         info_json 
= self
._download
_webpage
(url
, video_id
, 
  49                                            u
'Downloading video info JSON', 
  50                                            u
'unable to download video info JSON') 
  52         response 
= json
.loads(info_json
) 
  53         if type(response
) != list: 
  54             error_text 
= response
.get('error', 'unknown error') 
  55             raise ExtractorError(u
'Justin.tv API: %s' % error_text
) 
  58             video_url 
= clip
['video_file_url'] 
  60                 video_extension 
= os
.path
.splitext(video_url
)[1][1:] 
  61                 video_date 
= re
.sub('-', '', clip
['start_time'][:10]) 
  62                 video_uploader_id 
= clip
.get('user_id', clip
.get('channel_id')) 
  64                 video_title 
= clip
.get('title', video_id
) 
  69                     'uploader': clip
.get('channel_name', video_uploader_id
), 
  70                     'uploader_id': video_uploader_id
, 
  71                     'upload_date': video_date
, 
  72                     'ext': video_extension
, 
  74         return (len(response
), info
) 
  76     def _real_extract(self
, url
): 
  77         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  79             raise ExtractorError(u
'invalid URL: %s' % url
) 
  81         api_base 
= 'http://api.justin.tv' 
  83         if mobj
.group('channelid'): 
  85             video_id 
= mobj
.group('channelid') 
  86             api 
= api_base 
+ '/channel/archives/%s.json' % video_id
 
  87         elif mobj
.group('chapterid'): 
  88             chapter_id 
= mobj
.group('chapterid') 
  90             webpage 
= self
._download
_webpage
(url
, chapter_id
) 
  91             m 
= re
.search(r
'PP\.archive_id = "([0-9]+)";', webpage
) 
  93                 raise ExtractorError(u
'Cannot find archive of a chapter') 
  94             archive_id 
= m
.group(1) 
  96             api 
= api_base 
+ '/broadcast/by_chapter/%s.xml' % chapter_id
 
  97             chapter_info_xml 
= self
._download
_webpage
(api
, chapter_id
, 
  98                                              note
=u
'Downloading chapter information', 
  99                                              errnote
=u
'Chapter information download failed') 
 100             doc 
= xml
.etree
.ElementTree
.fromstring(chapter_info_xml
) 
 101             for a 
in doc
.findall('.//archive'): 
 102                 if archive_id 
== a
.find('./id').text
: 
 105                 raise ExtractorError(u
'Could not find chapter in chapter information') 
 107             video_url 
= a
.find('./video_file_url').text
 
 108             video_ext 
= video_url
.rpartition('.')[2] or u
'flv' 
 110             chapter_api_url 
= u
'https://api.twitch.tv/kraken/videos/c' + chapter_id
 
 111             chapter_info_json 
= self
._download
_webpage
(chapter_api_url
, u
'c' + chapter_id
, 
 112                                    note
='Downloading chapter metadata', 
 113                                    errnote
='Download of chapter metadata failed') 
 114             chapter_info 
= json
.loads(chapter_info_json
) 
 116             bracket_start 
= int(doc
.find('.//bracket_start').text
) 
 117             bracket_end 
= int(doc
.find('.//bracket_end').text
) 
 119             # TODO determine start (and probably fix up file) 
 120             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 
 121             #video_url += u'?start=' + TODO:start_timestamp 
 122             # bracket_start is 13290, but we want 51670615 
 123             self
._downloader
.report_warning(u
'Chapter detected, but we can just download the whole file. ' 
 124                                             u
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start
), formatSeconds(bracket_end
))) 
 127                 'id': u
'c' + chapter_id
, 
 130                 'title': chapter_info
['title'], 
 131                 'thumbnail': chapter_info
['preview'], 
 132                 'description': chapter_info
['description'], 
 133                 'uploader': chapter_info
['channel']['display_name'], 
 134                 'uploader_id': chapter_info
['channel']['name'], 
 138             video_id 
= mobj
.group('videoid') 
 139             api 
= api_base 
+ '/broadcast/by_archive/%s.json' % video_id
 
 141         self
.report_extraction(video_id
) 
 145         limit 
= self
._JUSTIN
_PAGE
_LIMIT
 
 148                 self
.report_download_page(video_id
, offset
) 
 149             page_url 
= api 
+ ('?offset=%d&limit=%d' % (offset
, limit
)) 
 150             page_count
, page_info 
= self
._parse
_page
(page_url
, video_id
) 
 151             info
.extend(page_info
) 
 152             if not paged 
or page_count 
!= limit
: