X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9815bb0a551468e4939cacfffbc2d5cb8dd12431..f46044c66663049e286c20ee015db99d47d9dd8a:/youtube_dl/extractor/keek.py?ds=inline diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index e2093a0..c0956ba 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -1,32 +1,46 @@ -import re +from __future__ import unicode_literals from .common import InfoExtractor class KeekIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' - IE_NAME = u'keek' + _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' + IE_NAME = 'keek' + _TEST = { + 'url': 'https://www.keek.com/ytdl/keeks/NODfbab', + 'md5': '09c5c109067536c1cec8bac8c21fea05', + 'info_dict': { + 'id': 'NODfbab', + 'ext': 'mp4', + 'uploader': 'youtube-dl project', + 'uploader_id': 'ytdl', + 'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', + }, + } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('videoID') + video_id = self._match_id(url) - video_url = u'http://cdn.keek.com/keek/video/%s' % video_id - thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id + video_url = 'http://cdn.keek.com/keek/video/%s' % video_id + thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex(r'[\S\s]+?

(?P.+?)

', - webpage, u'uploader', fatal=False) - - info = { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'title': video_title, - 'thumbnail': thumbnail, - 'uploader': uploader + raw_desc = self._html_search_meta('description', webpage) + if raw_desc: + uploader = self._html_search_regex( + r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False) + uploader_id = self._html_search_regex( + r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False) + else: + uploader = None + uploader_id = None + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': self._og_search_title(webpage), + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, } - return [info]