X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/46113edab215c2211a604c06245c16d5d4e57dcf..8593410c28e395b68b410169356663541005c3ae:/youtube_dl/extractor/shared.py?ds=inline diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index badba2a..02295d1 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,19 +1,59 @@ from __future__ import unicode_literals -import re -import base64 - from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote_plus, +) from ..utils import ( + determine_ext, ExtractorError, - compat_urllib_request, - compat_urllib_parse, int_or_none, + js_to_json, + KNOWN_EXTENSIONS, + parse_filesize, + rot47, + url_or_none, + urlencode_postdata, ) -class SharedIE(InfoExtractor): - _VALID_URL = r'http://shared\.sx/(?P[\da-z]{10})' +class SharedBaseIE(InfoExtractor): + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + if self._FILE_NOT_FOUND in webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + + video_url = self._extract_video_url(webpage, video_id, url) + + title = self._extract_title(webpage) + filesize = int_or_none(self._extract_filesize(webpage)) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'filesize': filesize, + 'title': title, + } + + def _extract_title(self, webpage): + return compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') + + def _extract_filesize(self, webpage): + return self._html_search_meta( + 'full:size', webpage, 'file size', fatal=False) + + +class SharedIE(SharedBaseIE): + IE_DESC = 'shared.sx' + _VALID_URL = r'https?://shared\.sx/(?P[\da-z]{10})' + _FILE_NOT_FOUND = '>File does not exist<' _TEST = { 'url': 'http://shared.sx/0060718775', @@ -22,36 +62,77 @@ class SharedIE(InfoExtractor): 'id': '0060718775', 'ext': 'mp4', 'title': 'Bmp4', + 'filesize': 1720110, }, } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage(url, video_id) + def _extract_video_url(self, webpage, video_id, url): + download_form = self._hidden_inputs(webpage) - if re.search(r'>File does not exist<', page) is not None: - raise ExtractorError('Video %s does not exist' % video_id, expected=True) + video_page = self._download_webpage( + url, video_id, 'Downloading video page', + data=urlencode_postdata(download_form), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + }) - download_form = dict(re.findall(r'(?:(?!\1).)+)\1', + video_page, 'video URL', group='url') - request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') + return video_url - video_page = self._download_webpage(request, video_id, 'Downloading video page') - video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL') - title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8') - filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False)) - thumbnail = self._html_search_regex( - r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None) +class VivoIE(SharedBaseIE): + IE_DESC = 'vivo.sx' + _VALID_URL = r'https?://vivo\.sx/(?P[\da-z]{10})' + _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' - return { - 'id': video_id, - 'url': video_url, + _TEST = { + 'url': 'http://vivo.sx/d7ddda0e78', + 'md5': '15b3af41be0b4fe01f4df075c2678b2c', + 'info_dict': { + 'id': 'd7ddda0e78', 'ext': 'mp4', - 'filesize': filesize, - 'title': title, - 'thumbnail': thumbnail, - } \ No newline at end of file + 'title': 'Chicken', + 'filesize': 515659, + }, + } + + def _extract_title(self, webpage): + title = self._html_search_regex( + r'data-name\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'title', default=None, group='title') + if title: + ext = determine_ext(title) + if ext.lower() in KNOWN_EXTENSIONS: + title = title.rpartition('.' + ext)[0] + return title + return self._og_search_title(webpage) + + def _extract_filesize(self, webpage): + return parse_filesize(self._search_regex( + r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', + webpage, 'filesize', fatal=False)) + + def _extract_video_url(self, webpage, video_id, url): + def decode_url_old(encoded_url): + return compat_b64decode(encoded_url).decode('utf-8') + + stream_url = self._search_regex( + r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'stream url', default=None, group='url') + if stream_url: + stream_url = url_or_none(decode_url_old(stream_url)) + if stream_url: + return stream_url + + def decode_url(encoded_url): + return rot47(compat_urllib_parse_unquote_plus(encoded_url)) + + return decode_url(self._parse_json( + self._search_regex( + r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage, + 'stream'), + video_id, transform_source=js_to_json)['source'])