3 from .common
import InfoExtractor
12 class XHamsterIE(InfoExtractor
):
13 """Information Extractor for xHamster"""
14 _VALID_URL
= r
'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
16 u
'url': u
'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
17 u
'file': u
'1509445.flv',
18 u
'md5': u
'9f48e0e8d58e3076bb236ff412ab62fa',
20 u
"upload_date": u
"20121014",
21 u
"uploader_id": u
"Ruseful2011",
22 u
"title": u
"FemaleAgent Shy beauty takes the bait",
27 u
'url': u
'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
28 u
'file': u
'2221348.flv',
29 u
'md5': u
'e767b9475de189320f691f49c679c4c7',
31 u
"upload_date": u
"20130914",
32 u
"uploader_id": u
"jojo747400",
33 u
"title": u
"Britney Spears Sexy Booty",
38 def _real_extract(self
,url
):
39 def extract_video_url(webpage
):
40 mobj
= re
.search(r
'\'srv
\': \'(?P
<server
>[^
\']*)\',\s
*\'file\': \'(?P
<file>[^
\']+)\',', webpage)
42 raise ExtractorError(u'Unable to extract media URL
')
43 if len(mobj.group('server
')) == 0:
44 return compat_urllib_parse.unquote(mobj.group('file'))
46 return mobj.group('server
')+'/key
='+mobj.group('file')
49 return webpage.find('<div
class=\'icon iconHD
\'>') != -1
51 mobj = re.match(self._VALID_URL, url)
53 video_id = mobj.group('id')
54 seo = mobj.group('seo
')
55 mrss_url = 'http
://xhamster
.com
/movies
/%s/%s.html
' % (video_id, seo)
56 webpage = self._download_webpage(mrss_url, video_id)
58 video_title = self._html_search_regex(r'<title
>(?P
<title
>.+?
) - xHamster\
.com
</title
>',
61 # Only a few videos have an description
62 mobj = re.search('<span
>Description
: </span
>(?P
<description
>[^
<]+)', webpage)
64 video_description = unescapeHTML(mobj.group('description
'))
66 video_description = None
68 mobj = re.search(r'hint
=\'(?P
<upload_date_Y
>[0-9]{4}
)-(?P
<upload_date_m
>[0-9]{2}
)-(?P
<upload_date_d
>[0-9]{2}
) [0-9]{2}
:[0-9]{2}
:[0-9]{2}
[A
-Z
]{3,4}\'', webpage)
70 video_upload_date = mobj.group('upload_date_Y
')+mobj.group('upload_date_m
')+mobj.group('upload_date_d
')
72 video_upload_date = None
73 self._downloader.report_warning(u'Unable to extract upload date
')
75 video_uploader_id = self._html_search_regex(r'<a href
=\'/user
/[^
>]+>(?P
<uploader_id
>[^
<]+)',
76 webpage, u'uploader
id', default=u'anonymous
')
78 video_thumbnail = self._search_regex(r'\'image
\':\'(?P
<thumbnail
>[^
\']+)\'',
79 webpage, u'thumbnail
', fatal=False)
81 age_limit = self._rta_search(webpage)
83 video_url = extract_video_url(webpage)
87 'ext
': determine_ext(video_url),
88 'format
': 'hd
' if hd else 'sd
',
89 'format_id
': 'hd
' if hd else 'sd
',
92 webpage = self._download_webpage(mrss_url+'?hd
', video_id)
94 video_url = extract_video_url(webpage)
97 'ext
': determine_ext(video_url),
104 'title
': video_title,
106 'description
': video_description,
107 'upload_date
': video_upload_date,
108 'uploader_id
': video_uploader_id,
109 'thumbnail
': video_thumbnail,
110 'age_limit
': age_limit,