X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/46113edab215c2211a604c06245c16d5d4e57dcf..a57dac80d518b579d7a7da90aa74ce401f138d0f:/youtube_dl/extractor/beeg.py diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 314e37f..c15a0ac 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -1,65 +1,109 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + int_or_none, + unified_timestamp, +) class BeegIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P\d+)' + _TESTS = [{ + # api/v6 v1 'url': 'http://beeg.com/5416503', - 'md5': '634526ae978711f6b748fe0dd6c11f57', + 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', 'info_dict': { 'id': '5416503', 'ext': 'mp4', 'title': 'Sultry Striptease', - 'description': 'md5:6db3c6177972822aaba18652ff59c773', - 'categories': list, # NSFW - 'thumbnail': 're:https?://.*\.jpg$', + 'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2', + 'timestamp': 1391813355, + 'upload_date': '20140207', + 'duration': 383, + 'tags': list, 'age_limit': 18, } - } + }, { + # api/v6 v2 + 'url': 'https://beeg.com/1941093077?t=911-1391', + 'only_matching': True, + }, { + 'url': 'https://beeg.porn/video/5416503', + 'only_matching': True, + }, { + 'url': 'https://beeg.porn/5416503', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - quality_arr = self._search_regex( - r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats') + beeg_version = self._search_regex( + r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', + default='1546225636701') + + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query = { + 'v': 2, + 's': t[0], + 'e': t[1], + } + else: + query = {'v': 1} - formats = [{ - 'url': fmt[1], - 'format_id': fmt[0], - 'height': int(fmt[0][:-1]), - } for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)] + for api_path in ('', 'api.'): + video = self._download_json( + 'https://%sbeeg.com/api/v6/%s/video/%s' + % (api_path, beeg_version, video_id), video_id, + fatal=api_path == 'api.', query=query) + if video: + break + formats = [] + for format_id, video_url in video.items(): + if not video_url: + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + if not height: + continue + formats.append({ + 'url': self._proto_relative_url( + video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'), + 'format_id': format_id, + 'height': int(height), + }) self._sort_formats(formats) - title = self._html_search_regex( - r'([^<]+)\s*-\s*beeg\.?', webpage, 'title') - - description = self._html_search_regex( - r'