1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   8     compat_urllib_parse_urlparse
, 
  18 class PornHubIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)' 
  21         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 
  22         'md5': '882f488fa1f0026f023f33576004a2ed', 
  27             "title": "Seductive Indian beauty strips down and fingers her pink pussy", 
  32     def _extract_count(self
, pattern
, webpage
, name
): 
  33         count 
= self
._html
_search
_regex
(pattern
, webpage
, '%s count' % name
, fatal
=False) 
  35             count 
= str_to_int(count
) 
  38     def _real_extract(self
, url
): 
  39         video_id 
= self
._match
_id
(url
) 
  41         req 
= compat_urllib_request
.Request(url
) 
  42         req
.add_header('Cookie', 'age_verified=1') 
  43         webpage 
= self
._download
_webpage
(req
, video_id
) 
  45         video_title 
= self
._html
_search
_regex
(r
'<h1 [^>]+>([^<]+)', webpage
, 'title') 
  46         video_uploader 
= self
._html
_search
_regex
( 
  47             r
'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', 
  48             webpage
, 'uploader', fatal
=False) 
  49         thumbnail 
= self
._html
_search
_regex
(r
'"image_url":"([^"]+)', webpage
, 'thumbnail', fatal
=False) 
  51             thumbnail 
= compat_urllib_parse
.unquote(thumbnail
) 
  53         view_count 
= self
._extract
_count
(r
'<span class="count">([\d,\.]+)</span> views', webpage
, 'view') 
  54         like_count 
= self
._extract
_count
(r
'<span class="votesUp">([\d,\.]+)</span>', webpage
, 'like') 
  55         dislike_count 
= self
._extract
_count
(r
'<span class="votesDown">([\d,\.]+)</span>', webpage
, 'dislike') 
  56         comment_count 
= self
._extract
_count
( 
  57             r
'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage
, 'comment') 
  59         video_urls 
= list(map(compat_urllib_parse
.unquote 
, re
.findall(r
'"quality_[0-9]{3}p":"([^"]+)', webpage
))) 
  60         if webpage
.find('"encrypted":true') != -1: 
  61             password 
= compat_urllib_parse
.unquote_plus(self
._html
_search
_regex
(r
'"video_title":"([^"]+)', webpage
, 'password')) 
  62             video_urls 
= list(map(lambda s
: aes_decrypt_text(s
, password
, 32).decode('utf-8'), video_urls
)) 
  65         for video_url 
in video_urls
: 
  66             path 
= compat_urllib_parse_urlparse(video_url
).path
 
  67             extension 
= os
.path
.splitext(path
)[1][1:] 
  68             format 
= path
.split('/')[5].split('_')[:2] 
  69             format 
= "-".join(format
) 
  71             m 
= re
.match(r
'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format
) 
  76                 height 
= int(m
.group('height')) 
  77                 tbr 
= int(m
.group('tbr')) 
  87         self
._sort
_formats
(formats
) 
  91             'uploader': video_uploader
, 
  93             'thumbnail': thumbnail
, 
  94             'view_count': view_count
, 
  95             'like_count': like_count
, 
  96             'dislike_count': dislike_count
, 
  97             'comment_count': comment_count
,