1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  14 class RedTubeIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' 
  17         'url': 'http://www.redtube.com/66418', 
  18         'md5': '7b8c22b5e7098a3e1c09709df1126d2d', 
  22             'title': 'Sucked on a toilet', 
  23             'upload_date': '20120831', 
  29         'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', 
  30         'only_matching': True, 
  34     def _extract_urls(webpage
): 
  36             r
'<iframe[^>]+?src=["\'](?P
<url
>(?
:https?
:)?
//embed\
.redtube\
.com
/\?.*?
\bid
=\d
+)', 
  39     def _real_extract(self, url): 
  40         video_id = self._match_id(url) 
  41         webpage = self._download_webpage( 
  42             'http
://www
.redtube
.com
/%s' % video_id, video_id) 
  44         if any(s in webpage for s in ['video
-deleted
-info
', '>This video has been removed
']): 
  45             raise ExtractorError('Video 
%s has been removed
' % video_id, expected=True) 
  47         title = self._html_search_regex( 
  48             (r'<h1 
class="videoTitle[^"]*">(?P<title>.+?)</h1>', 
  49              r'videoTitle\s*:\s*(["\'])(?P
<title
>)\
1'), 
  50             webpage, 'title
', group='title
') 
  53         sources = self._parse_json( 
  55                 r'sources\s
*:\s
*({.+?
})', webpage, 'source
', default='{}'), 
  56             video_id, fatal=False) 
  57         if sources and isinstance(sources, dict): 
  58             for format_id, format_url in sources.items(): 
  62                         'format_id
': format_id, 
  63                         'height
': int_or_none(format_id), 
  66             video_url = self._html_search_regex( 
  67                 r'<source src
="(.+?)" type="video/mp4">', webpage, 'video URL
') 
  68             formats.append({'url
': video_url}) 
  69         self._sort_formats(formats) 
  71         thumbnail = self._og_search_thumbnail(webpage) 
  72         upload_date = unified_strdate(self._search_regex( 
  73             r'<span
[^
>]+class="added-time"[^
>]*>ADDED ([^
<]+)<', 
  74             webpage, 'upload date
', fatal=False)) 
  75         duration = int_or_none(self._search_regex( 
  76             r'videoDuration\s
*:\s
*(\d
+)', webpage, 'duration
', fatal=False)) 
  77         view_count = str_to_int(self._search_regex( 
  78             r'<span
[^
>]*>VIEWS
</span
></td
>\s
*<td
>([\d
,.]+)', 
  79             webpage, 'view count
', fatal=False)) 
  81         # No self-labeling, but they describe themselves as 
  82         # "Home of Videos Porno" 
  89             'thumbnail
': thumbnail, 
  90             'upload_date
': upload_date, 
  92             'view_count
': view_count, 
  93             'age_limit
': age_limit,