]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bilibili.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  25 class BiliBiliIE(InfoExtractor
): 
  26     _VALID_URL 
= r
'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)' 
  29         'url': 'http://www.bilibili.tv/video/av1074402/', 
  30         'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 
  35             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', 
  37             'timestamp': 1398012678, 
  38             'upload_date': '20140420', 
  39             'thumbnail': r
're:^https?://.+\.jpg', 
  41             'uploader_id': '156160', 
  44         # Tested in BiliBiliBangumiIE 
  45         'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', 
  46         'only_matching': True, 
  48         'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', 
  49         'md5': '3f721ad1e75030cc06faf73587cfec57', 
  53             'title': 'CHAOS;CHILD', 
  54             'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', 
  56         'skip': 'Geo-restricted to China', 
  58         # Title with double quotes 
  59         'url': 'http://www.bilibili.com/video/av8903802/', 
  62             'title': '阿滴英文|英文歌分享#6 "Closer', 
  63             'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', 
  67                 'id': '8903802_part1', 
  69                 'title': '阿滴英文|英文歌分享#6 "Closer', 
  70                 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', 
  72                 'uploader_id': '65880958', 
  73                 'timestamp': 1488382634, 
  74                 'upload_date': '20170301', 
  77                 'skip_download': True,  # Test metadata only 
  81                 'id': '8903802_part2', 
  83                 'title': '阿滴英文|英文歌分享#6 "Closer', 
  84                 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', 
  86                 'uploader_id': '65880958', 
  87                 'timestamp': 1488382634, 
  88                 'upload_date': '20170301', 
  91                 'skip_download': True,  # Test metadata only 
  96     _APP_KEY 
= '84956560bc028eb7' 
  97     _BILIBILI_KEY 
= '94aba54af9065f71de72f5508f1cd42e' 
  99     def _report_error(self
, result
): 
 100         if 'message' in result
: 
 101             raise ExtractorError('%s said: %s' % (self
.IE_NAME
, result
['message']), expected
=True) 
 102         elif 'code' in result
: 
 103             raise ExtractorError('%s returns error %d' % (self
.IE_NAME
, result
['code']), expected
=True) 
 105             raise ExtractorError('Can\'t extract Bangumi episode ID') 
 107     def _real_extract(self
, url
): 
 108         url
, smuggled_data 
= unsmuggle_url(url
, {}) 
 110         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 111         video_id 
= mobj
.group('id') 
 112         anime_id 
= mobj
.group('anime_id') 
 113         webpage 
= self
._download
_webpage
(url
, video_id
) 
 115         if 'anime/' not in url
: 
 116             cid 
= self
._search
_regex
( 
 117                 r
'\bcid(?:["\']:|
=)(\d
+)', webpage, 'cid
', 
 119             ) or compat_parse_qs(self._search_regex( 
 120                 [r'EmbedPlayer\
([^
)]+,\s
*"([^"]+)"\)', 
 121                  r'EmbedPlayer\([^)]+,\s*\\"([^
"]+)\\"\
)', 
 122                  r'<iframe
[^
>]+src
="https://secure\.bilibili\.com/secure,([^"]+)"'], 
 123                 webpage, 'player parameters'))['cid'][0] 
 125             if 'no_bangumi_tip' not in smuggled_data: 
 126                 self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( 
 127                     video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) 
 129                 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 
 132             headers.update(self.geo_verification_headers()) 
 134             js = self._download_json( 
 135                 'http://bangumi.bilibili.com/web_api/get_source', video_id, 
 136                 data=urlencode_postdata({'episode_id': video_id}), 
 138             if 'result' not in js: 
 139                 self._report_error(js) 
 140             cid = js['result']['cid'] 
 145         headers.update(self.geo_verification_headers()) 
 149         RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') 
 150         for num, rendition in enumerate(RENDITIONS, start=1): 
 151             payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) 
 152             sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() 
 154             video_info = self._download_json( 
 155                 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), 
 156                 video_id, note='Downloading video info page', 
 157                 headers=headers, fatal=num == len(RENDITIONS)) 
 162             if 'durl' not in video_info: 
 163                 if num < len(RENDITIONS): 
 165                 self._report_error(video_info) 
 167             for idx, durl in enumerate(video_info['durl']): 
 170                     'filesize': int_or_none(durl['size']), 
 172                 for backup_url in durl.get('backup_url', []): 
 175                         # backup URLs have lower priorities 
 176                         'preference': -2 if 'hd.mp4' in backup_url else -3, 
 179                 for a_format in formats: 
 180                     a_format.setdefault('http_headers', {}).update({ 
 184                 self._sort_formats(formats) 
 187                     'id': '%s_part%s' % (video_id, idx), 
 188                     'duration': float_or_none(durl.get('length'), 1000), 
 193         title = self._html_search_regex( 
 194             ('<h1[^>]+\btitle=(["\'])(?P
<title
>(?
:(?
!\
1).)+)\
1', 
 195              '(?s
)<h1
[^
>]*>(?P
<title
>.+?
)</h1
>'), webpage, 'title
', 
 197         description = self._html_search_meta('description
', webpage) 
 198         timestamp = unified_timestamp(self._html_search_regex( 
 199             r'<time
[^
>]+datetime
="([^"]+)"', webpage, 'upload time', 
 200             default=None) or self._html_search_meta( 
 201             'uploadDate', webpage, 'timestamp', default=None)) 
 202         thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) 
 204         # TODO 'view_count' requires deobfuscating Javascript 
 208             'description': description, 
 209             'timestamp': timestamp, 
 210             'thumbnail': thumbnail, 
 211             'duration': float_or_none(video_info.get('timelength'), scale=1000), 
 214         uploader_mobj = re.search( 
 215             r'<a[^>]+href="(?
:https?
:)?
//space\
.bilibili\
.com
/(?P
<id>\d
+)"[^>]*>(?P<name>[^<]+)', 
 219                 'uploader': uploader_mobj.group('name'), 
 220                 'uploader_id': uploader_mobj.group('id'), 
 222         if not info.get('uploader'): 
 223             info['uploader'] = self._html_search_meta( 
 224                 'author', webpage, 'uploader', default=None) 
 226         for entry in entries: 
 229         if len(entries) == 1: 
 232             for idx, entry in enumerate(entries): 
 233                 entry['id'] = '%s_part%d' % (video_id, (idx + 1)) 
 236                 '_type': 'multi_video', 
 239                 'description': description, 
 244 class BiliBiliBangumiIE(InfoExtractor): 
 245     _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' 
 247     IE_NAME = 'bangumi.bilibili.com' 
 248     IE_DESC = 'BiliBili番剧' 
 251         'url': 'http://bangumi.bilibili.com/anime/1869', 
 255             'description': 'md5:6a9622b911565794c11f25f81d6a97d2', 
 257         'playlist_count': 26, 
 259         'url': 'http://bangumi.bilibili.com/anime/1869', 
 263             'description': 'md5:6a9622b911565794c11f25f81d6a97d2', 
 266             'md5': '91da8621454dd58316851c27c68b0c13', 
 271                 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', 
 272                 'timestamp': 1414538739, 
 273                 'upload_date': '20141028', 
 274                 'episode': '疾风怒涛 Tempestuous Temperaments', 
 279             'playlist_items': '1', 
 284     def suitable(cls, url): 
 285         return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) 
 287     def _real_extract(self, url): 
 288         bangumi_id = self._match_id(url) 
 290         # Sometimes this API returns a JSONP response 
 291         season_info = self._download_json( 
 292             'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, 
 293             bangumi_id, transform_source=strip_jsonp)['result'] 
 296             '_type': 'url_transparent', 
 297             'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), 
 298             'ie_key': BiliBiliIE.ie_key(), 
 299             'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), 
 300             'episode': episode.get('index_title'), 
 301             'episode_number': int_or_none(episode.get('index')), 
 302         } for episode in season_info['episodes']] 
 304         entries = sorted(entries, key=lambda entry: entry.get('episode_number')) 
 306         return self.playlist_result( 
 308             season_info.get('bangumi_title'), season_info.get('evaluate'))