X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/46113edab215c2211a604c06245c16d5d4e57dcf..97a8fc3ae80fb363c69c2e6b8c29b5373ac72aea:/youtube_dl/extractor/motherless.py diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 3621ff9..6fe3b60 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -5,62 +5,73 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, str_to_int, unified_strdate, ) class MotherlessIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P[A-Z0-9]+)' - _TESTS = [ - { - 'url': 'http://motherless.com/AC3FFE1', - 'md5': '310f62e325a9fafe64f68c0bccb6e75f', - 'info_dict': { - 'id': 'AC3FFE1', - 'ext': 'mp4', - 'title': 'Fucked in the ass while playing PS3', - 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], - 'upload_date': '20100913', - 'uploader_id': 'famouslyfuckedup', - 'thumbnail': 're:http://.*\.jpg', - 'age_limit': 18, - } - }, - { - 'url': 'http://motherless.com/532291B', - 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131', - 'info_dict': { - 'id': '532291B', - 'ext': 'mp4', - 'title': 'Amazing girl playing the omegle game, PERFECT!', - 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'], - 'upload_date': '20140622', - 'uploader_id': 'Sulivana7x', - 'thumbnail': 're:http://.*\.jpg', - 'age_limit': 18, - } + _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P[A-Z0-9]+)' + _TESTS = [{ + 'url': 'http://motherless.com/AC3FFE1', + 'md5': '310f62e325a9fafe64f68c0bccb6e75f', + 'info_dict': { + 'id': 'AC3FFE1', + 'ext': 'mp4', + 'title': 'Fucked in the ass while playing PS3', + 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], + 'upload_date': '20100913', + 'uploader_id': 'famouslyfuckedup', + 'thumbnail': r're:http://.*\.jpg', + 'age_limit': 18, + } + }, { + 'url': 'http://motherless.com/532291B', + 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131', + 'info_dict': { + 'id': '532291B', + 'ext': 'mp4', + 'title': 'Amazing girl playing the omegle game, PERFECT!', + 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', + 'game', 'hairy'], + 'upload_date': '20140622', + 'uploader_id': 'Sulivana7x', + 'thumbnail': r're:http://.*\.jpg', + 'age_limit': 18, }, - { - 'url': 'http://motherless.com/g/cosplay/633979F', - 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0', - 'info_dict': { - 'id': '633979F', - 'ext': 'mp4', - 'title': 'Turtlette', - 'categories': ['superheroine heroine superher'], - 'upload_date': '20140827', - 'uploader_id': 'shade0230', - 'thumbnail': 're:http://.*\.jpg', - 'age_limit': 18, - } + 'skip': '404', + }, { + 'url': 'http://motherless.com/g/cosplay/633979F', + 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0', + 'info_dict': { + 'id': '633979F', + 'ext': 'mp4', + 'title': 'Turtlette', + 'categories': ['superheroine heroine superher'], + 'upload_date': '20140827', + 'uploader_id': 'shade0230', + 'thumbnail': r're:http://.*\.jpg', + 'age_limit': 18, } - ] + }, { + # no keywords + 'url': 'http://motherless.com/8B4BBC1', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if any(p in webpage for p in ( + '404 - MOTHERLESS.COM<', + ">The page you're looking for cannot be found.<")): + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + if '>The content you are trying to view is for friends only.' in webpage: + raise ExtractorError('Video %s is for friends only' % video_id, expected=True) + title = self._html_search_regex( r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') video_url = self._html_search_regex( @@ -72,7 +83,7 @@ class MotherlessIE(InfoExtractor): like_count = str_to_int(self._html_search_regex( r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like count', fatal=False)) - + upload_date = self._html_search_regex( r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date') if 'Ago' in upload_date: @@ -86,7 +97,7 @@ class MotherlessIE(InfoExtractor): r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id') - categories = self._html_search_meta('keywords', webpage) + categories = self._html_search_meta('keywords', webpage, default=None) if categories: categories = [cat.strip() for cat in categories.split(',')]