]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/spankbang.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  19 class SpankBangIE(InfoExtractor
): 
  20     _VALID_URL 
= r
'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b' 
  22         'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 
  23         'md5': '1cc433e1d6aa14bc376535b8679302f7', 
  27             'title': 'fantasy solo', 
  28             'description': 'dillion harper masturbates on a bed', 
  29             'thumbnail': r
're:^https?://.*\.jpg$', 
  30             'uploader': 'silly2587', 
  31             'timestamp': 1422571989, 
  32             'upload_date': '20150129', 
  37         'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', 
  38         'only_matching': True, 
  41         'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', 
  42         'only_matching': True, 
  45         'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', 
  46         'only_matching': True, 
  49         'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', 
  50         'only_matching': True, 
  52         'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/', 
  53         'only_matching': True, 
  55         'url': 'https://m.spankbang.com/3vvn/play', 
  56         'only_matching': True, 
  58         'url': 'https://spankbang.com/2y3td/embed/', 
  59         'only_matching': True, 
  62     def _real_extract(self
, url
): 
  63         video_id 
= self
._match
_id
(url
) 
  64         webpage 
= self
._download
_webpage
( 
  65             url
.replace('/%s/embed' % video_id
, '/%s/video' % video_id
), 
  66             video_id
, headers
={'Cookie': 'country=US'}) 
  68         if re
.search(r
'<[^>]+\b(?:id|class)=["\']video_removed
', webpage): 
  70                 'Video 
%s is not available
' % video_id, expected=True) 
  74         def extract_format(format_id, format_url): 
  75             f_url = url_or_none(format_url) 
  78             f = parse_resolution(format_id) 
  79             ext = determine_ext(f_url) 
  80             if format_id.startswith('m3u8
') or ext == 'm3u8
': 
  81                 formats.extend(self._extract_m3u8_formats( 
  82                     f_url, video_id, 'mp4
', entry_protocol='m3u8_native
', 
  83                     m3u8_id='hls
', fatal=False)) 
  84             elif format_id.startswith('mpd
') or ext == 'mpd
': 
  85                 formats.extend(self._extract_mpd_formats( 
  86                     f_url, video_id, mpd_id='dash
', fatal=False)) 
  87             elif ext == 'mp4
' or f.get('width
') or f.get('height
'): 
  90                     'format_id
': format_id, 
  94         STREAM_URL_PREFIX = 'stream_url_
' 
  96         for mobj in re.finditer( 
  97                 r'%s(?P
<id>[^\s
=]+)\s
*=\s
*(["\'])(?P<url>(?:(?!\2).)+)\2' 
  98                 % STREAM_URL_PREFIX, webpage): 
  99             extract_format(mobj.group('id', 'url')) 
 102             stream_key = self._search_regex( 
 103                 r'data-streamkey\s*=\s*(["\'])(?P
<value
>(?
:(?
!\
1).)+)\
1', 
 104                 webpage, 'stream key
', group='value
') 
 106             stream = self._download_json( 
 107                 'https
://spankbang
.com
/api
/videos
/stream
', video_id, 
 108                 'Downloading stream JSON
', data=urlencode_postdata({ 
 113                     'X
-Requested
-With
': 'XMLHttpRequest
', 
 116             for format_id, format_url in stream.items(): 
 117                 if format_url and isinstance(format_url, list): 
 118                     format_url = format_url[0] 
 119                 extract_format(format_id, format_url) 
 121         self._sort_formats(formats, field_preference=('preference
', 'height
', 'width
', 'fps
', 'tbr
', 'format_id
')) 
 123         info = self._search_json_ld(webpage, video_id, default={}) 
 125         title = self._html_search_regex( 
 126             r'(?s
)<h1
[^
>]*>(.+?
)</h1
>', webpage, 'title
', default=None) 
 127         description = self._search_regex( 
 128             r'<div
[^
>]+\bclass
=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)', 
 129             webpage, 'description', default=None) 
 130         thumbnail = self._og_search_thumbnail(webpage, default=None) 
 131         uploader = self._html_search_regex( 
 132             (r'(?s)<li[^>]+class=["\']profile
[^
>]+>(.+?
)</a
>', 
 133              r'class="user"[^
>]*><img
[^
>]+>([^
<]+)'), 
 134             webpage, 'uploader
', default=None) 
 135         duration = parse_duration(self._search_regex( 
 136             r'<div
[^
>]+\bclass
=["\']right_side[^>]+>\s*<span>([^<]+)', 
 137             webpage, 'duration', default=None)) 
 138         view_count = str_to_int(self._search_regex( 
 139             r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) 
 141         age_limit = self._rta_search(webpage) 
 145             'title': title or video_id, 
 146             'description': description, 
 147             'thumbnail': thumbnail, 
 148             'uploader': uploader, 
 149             'duration': duration, 
 150             'view_count': view_count, 
 152             'age_limit': age_limit, 
 157 class SpankBangPlaylistIE(InfoExtractor): 
 158     _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+' 
 160         'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', 
 163             'title': 'Big Ass Titties', 
 165         'playlist_mincount': 50, 
 168     def _real_extract(self, url): 
 169         playlist_id = self._match_id(url) 
 171         webpage = self._download_webpage( 
 172             url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) 
 174         entries = [self.url_result( 
 175             'https://spankbang.com/%s/video' % video_id, 
 176             ie=SpankBangIE.ie_key(), video_id=video_id) 
 177             for video_id in orderedSet(re.findall( 
 178                 r'<a[^>]+\bhref=["\']/?
([\da
-z
]+)/play
/', webpage))] 
 180         title = self._html_search_regex( 
 181             r'<h1
>([^
<]+)\s
+playlist
</h1
>', webpage, 'playlist title
', 
 184         return self.playlist_result(entries, playlist_id, title)