1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  12 class XHamsterIE(InfoExtractor
): 
  13     """Information Extractor for xHamster""" 
  14     _VALID_URL 
= r
'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' 
  16         'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', 
  17         'file': '1509445.mp4', 
  18         'md5': '8281348b8d3c53d39fffb377d24eac4e', 
  20             "upload_date": "20121014", 
  21             "uploader_id": "Ruseful2011", 
  22             "title": "FemaleAgent Shy beauty takes the bait", 
  27         'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', 
  28         'file': '2221348.flv', 
  29         'md5': 'e767b9475de189320f691f49c679c4c7', 
  31             "upload_date": "20130914", 
  32             "uploader_id": "jojo747400", 
  33             "title": "Britney Spears  Sexy Booty", 
  38     def _real_extract(self
,url
): 
  39         def extract_video_url(webpage
): 
  40             mobj 
= re
.search(r
'\'srv
\': \'(?P
<server
>[^
\']*)\',\s
*\'file\': \'(?P
<file>[^
\']+)\',', webpage) 
  42                 raise ExtractorError('Unable to extract media URL
') 
  43             if len(mobj.group('server
')) == 0: 
  44                 return compat_urllib_parse.unquote(mobj.group('file')) 
  46                 return mobj.group('server
')+'/key
='+mobj.group('file') 
  48         def extract_mp4_video_url(webpage): 
  49             mp4 = re.search(r'<a href
=\"(.+?
)\" class=\"mp4Play
\"',webpage) 
  56             return '<div 
class=\'icon iconHD
\'' in webpage 
  58         mobj = re.match(self._VALID_URL, url) 
  60         video_id = mobj.group('id') 
  61         seo = mobj.group('seo
') 
  62         mrss_url = 'http
://xhamster
.com
/movies
/%s/%s.html
' % (video_id, seo) 
  63         webpage = self._download_webpage(mrss_url, video_id) 
  65         video_title = self._html_search_regex( 
  66             r'<title
>(?P
<title
>.+?
) - xHamster\
.com
</title
>', webpage, 'title
') 
  68         # Only a few videos have an description 
  69         mobj = re.search(r'<span
>Description
: </span
>([^
<]+)', webpage) 
  70         video_description = mobj.group(1) if mobj else None 
  72         mobj = re.search(r'hint
=\'(?P
<upload_date_Y
>[0-9]{4}
)-(?P
<upload_date_m
>[0-9]{2}
)-(?P
<upload_date_d
>[0-9]{2}
) [0-9]{2}
:[0-9]{2}
:[0-9]{2} 
[A
-Z
]{3,4}\'', webpage) 
  74             video_upload_date = mobj.group('upload_date_Y
')+mobj.group('upload_date_m
')+mobj.group('upload_date_d
') 
  76             video_upload_date = None 
  77             self._downloader.report_warning('Unable to extract upload date
') 
  79         video_uploader_id = self._html_search_regex( 
  80             r'<a href
=\'/user
/[^
>]+>(?P
<uploader_id
>[^
<]+)', 
  81             webpage, 'uploader 
id', default='anonymous
') 
  83         video_thumbnail = self._search_regex( 
  84             r'\'image
\':\'(?P
<thumbnail
>[^
\']+)\'', 
  85             webpage, 'thumbnail
', fatal=False) 
  87         age_limit = self._rta_search(webpage) 
  90         video_url = extract_video_url(webpage) 
  93             'format_id
': 'hd
' if hd else 'sd
', 
  97         video_mp4_url = extract_mp4_video_url(webpage) 
  98         if video_mp4_url is not None: 
 100                 'url
': video_mp4_url, 
 102                 'format_id
': 'mp4
-hd
' if hd else 'mp4
-sd
', 
 107             webpage = self._download_webpage( 
 108                 mrss_url + '?hd
', video_id, note='Downloading HD webpage
') 
 110                 video_url = extract_video_url(webpage) 
 117         self._sort_formats(formats) 
 121             'title
': video_title, 
 123             'description
': video_description, 
 124             'upload_date
': video_upload_date, 
 125             'uploader_id
': video_uploader_id, 
 126             'thumbnail
': video_thumbnail, 
 127             'age_limit
': age_limit,