3 from .common
import InfoExtractor
11 class XHamsterIE(InfoExtractor
):
12 """Information Extractor for xHamster"""
13 _VALID_URL
= r
'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
15 def _real_extract(self
,url
):
16 mobj
= re
.match(self
._VALID
_URL
, url
)
18 video_id
= mobj
.group('id')
19 mrss_url
= 'http://xhamster.com/movies/%s/.html' % video_id
20 webpage
= self
._download
_webpage
(mrss_url
, video_id
)
22 mobj
= re
.search(r
'\'srv
\': \'(?P
<server
>[^
\']*)\',\s
*\'file\': \'(?P
<file>[^
\']+)\',', webpage)
24 raise ExtractorError(u'Unable to extract media URL
')
25 if len(mobj.group('server
')) == 0:
26 video_url = compat_urllib_parse.unquote(mobj.group('file'))
28 video_url = mobj.group('server
')+'/key
='+mobj.group('file')
29 video_extension = video_url.split('.')[-1]
31 video_title = self._html_search_regex(r'<title
>(?P
<title
>.+?
) - xHamster\
.com
</title
>',
34 # Can't see the description anywhere
in the UI
35 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
36 # webpage, u'description', fatal=False)
37 # if video_description: video_description = unescapeHTML(video_description)
39 mobj
= re
.search(r
'hint=\'(?P
<upload_date_Y
>[0-9]{4}
)-(?P
<upload_date_m
>[0-9]{2}
)-(?P
<upload_date_d
>[0-9]{2}
) [0-9]{2}
:[0-9]{2}
:[0-9]{2}
[A
-Z
]{3,4}\'', webpage)
41 video_upload_date = mobj.group('upload_date_Y
')+mobj.group('upload_date_m
')+mobj.group('upload_date_d
')
43 video_upload_date = None
44 self._downloader.report_warning(u'Unable to extract upload date
')
46 video_uploader_id = self._html_search_regex(r'<a href
=\'/user
/[^
>]+>(?P
<uploader_id
>[^
<]+)',
47 webpage, u'uploader
id', default=u'anonymous
')
49 video_thumbnail = self._search_regex(r'\'image
\':\'(?P
<thumbnail
>[^
\']+)\'',
50 webpage, u'thumbnail
', fatal=False)
55 'ext
': video_extension,
57 # 'description
': video_description,
58 'upload_date
': video_upload_date,
59 'uploader_id
': video_uploader_id,
60 'thumbnail
': video_thumbnail