]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bilibili.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
   8 from ..compat 
import compat_parse_qs
 
  17 class BiliBiliIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)' 
  21         'url': 'http://www.bilibili.tv/video/av1074402/', 
  22         'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 
  27             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', 
  29             'timestamp': 1398012660, 
  30             'upload_date': '20140420', 
  31             'thumbnail': 're:^https?://.+\.jpg', 
  33             'uploader_id': '156160', 
  37     _APP_KEY 
= '6f90a59ac58a4123' 
  38     _BILIBILI_KEY 
= '0bfd84cc3940035173f35e6777508326' 
  40     def _real_extract(self
, url
): 
  41         video_id 
= self
._match
_id
(url
) 
  42         webpage 
= self
._download
_webpage
(url
, video_id
) 
  44         if 'anime/v' not in url
: 
  45             cid 
= compat_parse_qs(self
._search
_regex
( 
  46                 [r
'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', 
  47                  r
'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], 
  48                 webpage
, 'player parameters'))['cid'][0] 
  50             js 
= self
._download
_json
( 
  51                 'http://bangumi.bilibili.com/web_api/get_source', video_id
, 
  52                 data
=urlencode_postdata({'episode_id': video_id
}), 
  53                 headers
={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}) 
  54             cid 
= js
['result']['cid'] 
  56         payload 
= 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self
._APP
_KEY
, cid
) 
  57         sign 
= hashlib
.md5((payload 
+ self
._BILIBILI
_KEY
).encode('utf-8')).hexdigest() 
  59         video_info 
= self
._download
_json
( 
  60             'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload
, sign
), 
  61             video_id
, note
='Downloading video info page') 
  65         for idx
, durl 
in enumerate(video_info
['durl']): 
  68                 'filesize': int_or_none(durl
['size']), 
  70             for backup_url 
in durl
.get('backup_url', []): 
  73                     # backup URLs have lower priorities 
  74                     'preference': -2 if 'hd.mp4' in backup_url 
else -3, 
  77             self
._sort
_formats
(formats
) 
  80                 'id': '%s_part%s' % (video_id
, idx
), 
  81                 'duration': float_or_none(durl
.get('length'), 1000), 
  85         title 
= self
._html
_search
_regex
('<h1[^>]+title="([^"]+)">', webpage
, 'title') 
  86         description 
= self
._html
_search
_meta
('description', webpage
) 
  87         timestamp 
= unified_timestamp(self
._html
_search
_regex
( 
  88             r
'<time[^>]+datetime="([^"]+)"', webpage
, 'upload time', fatal
=False)) 
  89         thumbnail 
= self
._html
_search
_meta
(['og:image', 'thumbnailUrl'], webpage
) 
  91         # TODO 'view_count' requires deobfuscating Javascript 
  95             'description': description
, 
  96             'timestamp': timestamp
, 
  97             'thumbnail': thumbnail
, 
  98             'duration': float_or_none(video_info
.get('timelength'), scale
=1000), 
 101         uploader_mobj 
= re
.search( 
 102             r
'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', 
 106                 'uploader': uploader_mobj
.group('name'), 
 107                 'uploader_id': uploader_mobj
.group('id'), 
 110         for entry 
in entries
: 
 113         if len(entries
) == 1: 
 116             for idx
, entry 
in enumerate(entries
): 
 117                 entry
['id'] = '%s_part%d' % (video_id
, (idx 
+ 1)) 
 120                 '_type': 'multi_video', 
 123                 'description': description
,