X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/feb5020b37d7d3ba4005a8bac6f4efece4ce4b8c..82d01c1a88911e89e3e7ed4518d54c028f9e4792:/youtube_dl/extractor/keek.py diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index 72ad6a3..c0956ba 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -1,41 +1,46 @@ -import re +from __future__ import unicode_literals from .common import InfoExtractor class KeekIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' - IE_NAME = u'keek' + _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' + IE_NAME = 'keek' _TEST = { - u'url': u'http://www.keek.com/ytdl/keeks/NODfbab', - u'file': u'NODfbab.mp4', - u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83', - u'info_dict': { - u"uploader": u"ytdl", - u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." - } + 'url': 'https://www.keek.com/ytdl/keeks/NODfbab', + 'md5': '09c5c109067536c1cec8bac8c21fea05', + 'info_dict': { + 'id': 'NODfbab', + 'ext': 'mp4', + 'uploader': 'youtube-dl project', + 'uploader_id': 'ytdl', + 'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', + }, } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('videoID') + video_id = self._match_id(url) - video_url = u'http://cdn.keek.com/keek/video/%s' % video_id - thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id + video_url = 'http://cdn.keek.com/keek/video/%s' % video_id + thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex(r'[\S\s]+?

(?P.+?)

', - webpage, u'uploader', fatal=False) - - info = { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'title': video_title, - 'thumbnail': thumbnail, - 'uploader': uploader + raw_desc = self._html_search_meta('description', webpage) + if raw_desc: + uploader = self._html_search_regex( + r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False) + uploader_id = self._html_search_regex( + r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False) + else: + uploader = None + uploader_id = None + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': self._og_search_title(webpage), + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, } - return [info]