1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   8     compat_urllib_parse_urlparse
, 
  18 class PornHubIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))' 
  21         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 
  22         'file': '648719015.mp4', 
  23         'md5': '882f488fa1f0026f023f33576004a2ed', 
  25             "uploader": "BABES-COM", 
  26             "title": "Seductive Indian beauty strips down and fingers her pink pussy", 
  31     def _extract_count(self
, pattern
, webpage
, name
): 
  32         count 
= self
._html
_search
_regex
(pattern
, webpage
, '%s count' % name
, fatal
=False) 
  34             count 
= str_to_int(count
) 
  37     def _real_extract(self
, url
): 
  38         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  39         video_id 
= mobj
.group('videoid') 
  40         url 
= 'http://www.' + mobj
.group('url') 
  42         req 
= compat_urllib_request
.Request(url
) 
  43         req
.add_header('Cookie', 'age_verified=1') 
  44         webpage 
= self
._download
_webpage
(req
, video_id
) 
  46         video_title 
= self
._html
_search
_regex
(r
'<h1 [^>]+>([^<]+)', webpage
, 'title') 
  47         video_uploader 
= self
._html
_search
_regex
( 
  48             r
'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', 
  49             webpage
, 'uploader', fatal
=False) 
  50         thumbnail 
= self
._html
_search
_regex
(r
'"image_url":"([^"]+)', webpage
, 'thumbnail', fatal
=False) 
  52             thumbnail 
= compat_urllib_parse
.unquote(thumbnail
) 
  54         view_count 
= self
._extract
_count
(r
'<span class="count">([\d,\.]+)</span> views', webpage
, 'view') 
  55         like_count 
= self
._extract
_count
(r
'<span class="votesUp">([\d,\.]+)</span>', webpage
, 'like') 
  56         dislike_count 
= self
._extract
_count
(r
'<span class="votesDown">([\d,\.]+)</span>', webpage
, 'dislike') 
  57         comment_count 
= self
._extract
_count
( 
  58             r
'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage
, 'comment') 
  60         video_urls 
= list(map(compat_urllib_parse
.unquote 
, re
.findall(r
'"quality_[0-9]{3}p":"([^"]+)', webpage
))) 
  61         if webpage
.find('"encrypted":true') != -1: 
  62             password 
= compat_urllib_parse
.unquote_plus(self
._html
_search
_regex
(r
'"video_title":"([^"]+)', webpage
, 'password')) 
  63             video_urls 
= list(map(lambda s
: aes_decrypt_text(s
, password
, 32).decode('utf-8'), video_urls
)) 
  66         for video_url 
in video_urls
: 
  67             path 
= compat_urllib_parse_urlparse(video_url
).path
 
  68             extension 
= os
.path
.splitext(path
)[1][1:] 
  69             format 
= path
.split('/')[5].split('_')[:2] 
  70             format 
= "-".join(format
) 
  72             m 
= re
.match(r
'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format
) 
  77                 height 
= int(m
.group('height')) 
  78                 tbr 
= int(m
.group('tbr')) 
  88         self
._sort
_formats
(formats
) 
  92             'uploader': video_uploader
, 
  94             'thumbnail': thumbnail
, 
  95             'view_count': view_count
, 
  96             'like_count': like_count
, 
  97             'dislike_count': dislike_count
, 
  98             'comment_count': comment_count
,