1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  17 class MotherlessIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)' 
  20         'url': 'http://motherless.com/AC3FFE1', 
  21         'md5': '310f62e325a9fafe64f68c0bccb6e75f', 
  25             'title': 'Fucked in the ass while playing PS3', 
  26             'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], 
  27             'upload_date': '20100913', 
  28             'uploader_id': 'famouslyfuckedup', 
  29             'thumbnail': r
're:http://.*\.jpg', 
  33         'url': 'http://motherless.com/532291B', 
  34         'md5': 'bc59a6b47d1f958e61fbd38a4d31b131', 
  38             'title': 'Amazing girl playing the omegle game, PERFECT!', 
  39             'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 
  41             'upload_date': '20140622', 
  42             'uploader_id': 'Sulivana7x', 
  43             'thumbnail': r
're:http://.*\.jpg', 
  48         'url': 'http://motherless.com/g/cosplay/633979F', 
  49         'md5': '0b2a43f447a49c3e649c93ad1fafa4a0', 
  54             'categories': ['superheroine heroine  superher'], 
  55             'upload_date': '20140827', 
  56             'uploader_id': 'shade0230', 
  57             'thumbnail': r
're:http://.*\.jpg', 
  62         'url': 'http://motherless.com/8B4BBC1', 
  63         'only_matching': True, 
  66     def _real_extract(self
, url
): 
  67         video_id 
= self
._match
_id
(url
) 
  68         webpage 
= self
._download
_webpage
(url
, video_id
) 
  70         if any(p 
in webpage 
for p 
in ( 
  71                 '<title>404 - MOTHERLESS.COM<', 
  72                 ">The page you're looking for cannot be found.<")): 
  73             raise ExtractorError('Video %s does not exist' % video_id
, expected
=True) 
  75         if '>The content you are trying to view is for friends only.' in webpage
: 
  76             raise ExtractorError('Video %s is for friends only' % video_id
, expected
=True) 
  78         title 
= self
._html
_search
_regex
( 
  79             r
'id="view-upload-title">\s+([^<]+)<', webpage
, 'title') 
  80         video_url 
= (self
._html
_search
_regex
( 
  81             (r
'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', 
  82              r'fileurl\s
*=\s
*(["\'])(?P<url>(?:(?!\1).)+)\1'), 
  83             webpage, 'video URL', default=None, group='url') 
  84             or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) 
  85         age_limit = self._rta_search(webpage) 
  86         view_count = str_to_int(self._html_search_regex( 
  87             r'<strong>Views</strong>\s+([^<]+)<', 
  88             webpage, 'view count', fatal=False)) 
  89         like_count = str_to_int(self._html_search_regex( 
  90             r'<strong>Favorited</strong>\s+([^<]+)<', 
  91             webpage, 'like count', fatal=False)) 
  93         upload_date = self._html_search_regex( 
  94             r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date') 
  95         if 'Ago' in upload_date: 
  96             days = int(re.search(r'([0-9]+)', upload_date).group(1)) 
  97             upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') 
  99             upload_date = unified_strdate(upload_date) 
 101         comment_count = webpage.count('class="media
-comment
-contents
"') 
 102         uploader_id = self._html_search_regex( 
 103             r'"thumb
-member
-username
">\s+<a href="/m
/([^
"]+)"', 
 104             webpage, 'uploader_id
') 
 106         categories = self._html_search_meta('keywords
', webpage, default=None) 
 108             categories = [cat.strip() for cat in categories.split(',')] 
 113             'upload_date
': upload_date, 
 114             'uploader_id
': uploader_id, 
 115             'thumbnail
': self._og_search_thumbnail(webpage), 
 116             'categories
': categories, 
 117             'view_count
': view_count, 
 118             'like_count
': like_count, 
 119             'comment_count
': comment_count, 
 120             'age_limit
': age_limit, 
 125 class MotherlessGroupIE(InfoExtractor): 
 126     _VALID_URL = r'https?
://(?
:www\
.)?motherless\
.com
/gv?
/(?P
<id>[a
-z0
-9_]+)' 
 128         'url
': 'http
://motherless
.com
/g
/movie_scenes
', 
 130             'id': 'movie_scenes
', 
 131             'title
': 'Movie Scenes
', 
 132             'description
': 'Hot 
and sexy scenes 
from "regular" movies
... ' 
 133                            'Beautiful actresses fully nude
... A looot of 
' 
 136         'playlist_mincount
': 662, 
 138         'url
': 'http
://motherless
.com
/gv
/sex_must_be_funny
', 
 140             'id': 'sex_must_be_funny
', 
 141             'title
': 'Sex must be funny
', 
 142             'description
': 'Sex can be funny
. Wide smiles
,laugh
, games
, fun of 
' 
 145         'playlist_mincount
': 9, 
 149     def suitable(cls, url): 
 150         return (False if MotherlessIE.suitable(url) 
 151                 else super(MotherlessGroupIE, cls).suitable(url)) 
 153     def _extract_entries(self, webpage, base): 
 155         for mobj in re.finditer( 
 156                 r'href
="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^
-]+-\
s(?P
<title
>[^
"]+)")?
', 
 158             video_url = compat_urlparse.urljoin(base, mobj.group('href
')) 
 159             if not MotherlessIE.suitable(video_url): 
 161             video_id = MotherlessIE._match_id(video_url) 
 162             title = mobj.group('title
') 
 163             entries.append(self.url_result( 
 164                 video_url, ie=MotherlessIE.ie_key(), video_id=video_id, 
 166         # Alternative fallback 
 170                     compat_urlparse.urljoin(base, '/' + entry_id), 
 171                     ie=MotherlessIE.ie_key(), video_id=entry_id) 
 172                 for entry_id in orderedSet(re.findall( 
 173                     r'data
-codename
=["\']([A-Z0-9]+)', webpage))] 
 176     def _real_extract(self, url): 
 177         group_id = self._match_id(url) 
 178         page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id) 
 179         webpage = self._download_webpage(page_url, group_id) 
 180         title = self._search_regex( 
 181             r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) 
 182         description = self._html_search_meta( 
 183             'description', webpage, fatal=False) 
 184         page_count = self._int(self._search_regex( 
 185             r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT', 
 186             webpage, 'page_count'), 'page_count') 
 190             webpage = self._download_webpage( 
 191                 page_url, group_id, query={'page': idx + 1}, 
 192                 note='Downloading page %d/%d' % (idx + 1, page_count) 
 194             for entry in self._extract_entries(webpage, url): 
 197         playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE) 
 203             'description': description,