X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/233624c1db781ee7dabbaf88453cf18e248dd20d..20205e79eb4216762c923790b27dbe38f945293a:/youtube_dl/extractor/youporn.py diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index ea0bce7..e7fca22 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, - sanitized_Request, str_to_int, unescapeHTML, unified_strdate, @@ -15,7 +14,7 @@ from ..aes import aes_decrypt_text class YouPornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P\d+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P\d+)(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', 'md5': '3744d24c50438cf5b6f6d59feb5055c2', @@ -57,22 +56,32 @@ class YouPornIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/', + 'only_matching': True, + }, { + 'url': 'http://www.youporn.com/watch/505835', + 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)', + webpage) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or video_id - request = sanitized_Request(url) - request.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(request, display_id) + webpage = self._download_webpage( + 'http://www.youporn.com/watch/%s' % video_id, display_id, + headers={'Cookie': 'age_verified=1'}) - title = self._search_regex( - [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P(?:(?!\1).)+)\1', - r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'], - webpage, 'title', group='title', - default=None) or self._og_search_title( + title = self._html_search_regex( + r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', + webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or self._html_search_meta( 'title', webpage, fatal=True) @@ -134,7 +143,11 @@ class YouPornIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - description = self._og_search_description(webpage, default=None) + description = self._html_search_regex( + r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>', + webpage, 'description', + default=None) or self._og_search_description( + webpage, default=None) thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail')