]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bilibili.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  16 class BiliBiliIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' 
  20         'url': 'http://www.bilibili.tv/video/av1074402/', 
  21         'md5': '2c301e4dab317596e837c3e7633e7d86', 
  27             'upload_date': '20140420', 
  28             'thumbnail': 're:^https?://.+\.jpg', 
  29             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', 
  30             'timestamp': 1397983878, 
  34         'url': 'http://www.bilibili.com/video/av1041170/', 
  37             'title': '【BD1080P】刀语【诸神&异域】', 
  38             'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', 
  40             'timestamp': 1396501299, 
  45     def _real_extract(self
, url
): 
  46         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  47         video_id 
= mobj
.group('id') 
  48         page_num 
= mobj
.group('page_num') or '1' 
  50         view_data 
= self
._download
_json
( 
  51             'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id
, page_num
), 
  53         if 'error' in view_data
: 
  54             raise ExtractorError('%s said: %s' % (self
.IE_NAME
, view_data
['error']), expected
=True) 
  56         cid 
= view_data
['cid'] 
  57         title 
= unescapeHTML(view_data
['title']) 
  59         doc 
= self
._download
_xml
( 
  60             'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid
, 
  62             'Downloading page %s/%s' % (page_num
, view_data
['pages']) 
  65         if xpath_text(doc
, './result') == 'error': 
  66             raise ExtractorError('%s said: %s' % (self
.IE_NAME
, xpath_text(doc
, './message')), expected
=True) 
  70         for durl 
in doc
.findall('./durl'): 
  71             size 
= xpath_text(durl
, ['./filesize', './size']) 
  73                 'url': durl
.find('./url').text
, 
  74                 'filesize': int_or_none(size
), 
  77             backup_urls 
= durl
.find('./backup_url') 
  78             if backup_urls 
is not None: 
  79                 for backup_url 
in backup_urls
.findall('./url'): 
  80                     formats
.append({'url': backup_url
.text
}) 
  84                 'id': '%s_part%s' % (cid
, xpath_text(durl
, './order')), 
  86                 'duration': int_or_none(xpath_text(durl
, './length'), 1000), 
  91             'id': compat_str(cid
), 
  93             'description': view_data
.get('description'), 
  94             'thumbnail': view_data
.get('pic'), 
  95             'uploader': view_data
.get('author'), 
  96             'timestamp': int_or_none(view_data
.get('created')), 
  97             'view_count': int_or_none(view_data
.get('play')), 
  98             'duration': int_or_none(xpath_text(doc
, './timelength')), 
 101         if len(entries
) == 1: 
 102             entries
[0].update(info
) 
 106                 '_type': 'multi_video',