1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
14 class RedTubeIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
17 'url': 'http://www.redtube.com/66418',
18 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
22 'title': 'Sucked on a toilet',
23 'upload_date': '20120831',
29 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
30 'only_matching': True,
34 def _extract_urls(webpage
):
36 r
'<iframe[^>]+?src=["\'](?P
<url
>(?
:https?
:)?
//embed\
.redtube\
.com
/\?.*?
\bid
=\d
+)',
39 def _real_extract(self, url):
40 video_id = self._match_id(url)
41 webpage = self._download_webpage(
42 'http
://www
.redtube
.com
/%s' % video_id, video_id)
44 if any(s in webpage for s in ['video
-deleted
-info
', '>This video has been removed
']):
45 raise ExtractorError('Video
%s has been removed
' % video_id, expected=True)
47 title = self._html_search_regex(
48 (r'<h1
class="videoTitle[^"]*">(?P<title>.+?)</h1>',
49 r'videoTitle\s*:\s*(["\'])(?P
<title
>)\
1'),
50 webpage, 'title
', group='title
')
53 sources = self._parse_json(
55 r'sources\s
*:\s
*({.+?
})', webpage, 'source
', default='{}'),
56 video_id, fatal=False)
57 if sources and isinstance(sources, dict):
58 for format_id, format_url in sources.items():
62 'format_id
': format_id,
63 'height
': int_or_none(format_id),
66 video_url = self._html_search_regex(
67 r'<source src
="(.+?)" type="video/mp4">', webpage, 'video URL
')
68 formats.append({'url
': video_url})
69 self._sort_formats(formats)
71 thumbnail = self._og_search_thumbnail(webpage)
72 upload_date = unified_strdate(self._search_regex(
73 r'<span
[^
>]+class="added-time"[^
>]*>ADDED ([^
<]+)<',
74 webpage, 'upload date
', fatal=False))
75 duration = int_or_none(self._search_regex(
76 r'videoDuration\s
*:\s
*(\d
+)', webpage, 'duration
', fatal=False))
77 view_count = str_to_int(self._search_regex(
78 r'<span
[^
>]*>VIEWS
</span
></td
>\s
*<td
>([\d
,.]+)',
79 webpage, 'view count
', fatal=False))
81 # No self-labeling, but they describe themselves as
82 # "Home of Videos Porno"
89 'thumbnail
': thumbnail,
90 'upload_date
': upload_date,
92 'view_count
': view_count,
93 'age_limit
': age_limit,