X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/415fdb62500dca2e22067a05008dfbf87c75b662..e40743cdcfc2b95e9afd3135ba7f3e024be9f41e:/youtube_dl/extractor/bilibili.py diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 45067b9..6c66a12 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -2,37 +2,56 @@ from __future__ import unicode_literals import re +import itertools +import json from .common import InfoExtractor +from ..compat import ( + compat_etree_fromstring, +) from ..utils import ( - compat_parse_qs, - ExtractorError, int_or_none, unified_strdate, + ExtractorError, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'http://www\.bilibili\.tv/video/av(?P[0-9]+)/' + _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P[0-9]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '2c301e4dab317596e837c3e7633e7d86', 'info_dict': { - 'id': '1074402', + 'id': '1074402_part1', 'ext': 'flv', 'title': '【金坷垃】金泡沫', 'duration': 308, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', }, - } + }, { + 'url': 'http://www.bilibili.com/video/av1041170/', + 'info_dict': { + 'id': '1041170', + 'title': '【BD1080P】刀语【诸神&异域】', + }, + 'playlist_count': 9, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + + if '(此视频不存在或被删除)' in webpage: + raise ExtractorError( + 'The video does not exist or was deleted', expected=True) + + if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage: + raise ExtractorError( + 'The video is not available in your region due to copyright reasons', + expected=True) + video_code = self._search_regex( r'(?s)
(.*?)
', webpage, 'video code') @@ -55,19 +74,39 @@ class BiliBiliIE(InfoExtractor): thumbnail = self._html_search_meta( 'thumbnailUrl', video_code, 'thumbnail', fatal=False) - player_params = compat_parse_qs(self._html_search_regex( - r'