X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/415fdb62500dca2e22067a05008dfbf87c75b662..1918d37666ea9185c49ca84e64f58e4ff2a422c9:/youtube_dl/extractor/bilibili.py diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 45067b9..1e57310 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,106 +1,261 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, + compat_urlparse, +) +from ..utils import ( ExtractorError, int_or_none, - unified_strdate, + float_or_none, + parse_iso8601, + smuggle_url, + strip_jsonp, + unified_timestamp, + unsmuggle_url, + urlencode_postdata, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'http://www\.bilibili\.tv/video/av(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', - 'md5': '2c301e4dab317596e837c3e7633e7d86', + 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { 'id': '1074402', - 'ext': 'flv', + 'ext': 'mp4', 'title': '【金坷垃】金泡沫', - 'duration': 308, + 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', + 'duration': 308.315, + 'timestamp': 1398012660, 'upload_date': '20140420', - 'thumbnail': 're:^https?://.+\.jpg', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': '菊子桑', + 'uploader_id': '156160', + }, + }, { + # Tested in BiliBiliBangumiIE + 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', + 'only_matching': True, + }, { + 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', + 'md5': '3f721ad1e75030cc06faf73587cfec57', + 'info_dict': { + 'id': '100643', + 'ext': 'mp4', + 'title': 'CHAOS;CHILD', + 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', + }, + 'skip': 'Geo-restricted to China', + }, { + # Title with double quotes + 'url': 'http://www.bilibili.com/video/av8903802/', + 'info_dict': { + 'id': '8903802', + 'ext': 'mp4', + 'title': '阿滴英文|英文歌分享#6 "Closer', + 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', + 'uploader': '阿滴英文', + 'uploader_id': '65880958', + 'timestamp': 1488382620, + 'upload_date': '20170301', + }, + 'params': { + 'skip_download': True, # Test metadata only }, - } + }] + + _APP_KEY = '84956560bc028eb7' + _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + + def _report_error(self, result): + if 'message' in result: + raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) + elif 'code' in result: + raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) + else: + raise ExtractorError('Can\'t extract Bangumi episode ID') def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - + anime_id = mobj.group('anime_id') webpage = self._download_webpage(url, video_id) - video_code = self._search_regex( - r'(?s)
(.*?)
', webpage, 'video code') - - title = self._html_search_meta( - 'media:title', video_code, 'title', fatal=True) - duration_str = self._html_search_meta( - 'duration', video_code, 'duration') - if duration_str is None: - duration = None + + if 'anime/' not in url: + cid = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters'))['cid'][0] else: - duration_mobj = re.match( - r'^T(?:(?P[0-9]+)H)?(?P[0-9]+)M(?P[0-9]+)S$', - duration_str) - duration = ( - int_or_none(duration_mobj.group('hours'), default=0) * 3600 + - int(duration_mobj.group('minutes')) * 60 + - int(duration_mobj.group('seconds'))) - upload_date = unified_strdate(self._html_search_meta( - 'uploadDate', video_code, fatal=False)) - thumbnail = self._html_search_meta( - 'thumbnailUrl', video_code, 'thumbnail', fatal=False) - - player_params = compat_parse_qs(self._html_search_regex( - r'