1 from __future__
import unicode_literals
6 from .common
import InfoExtractor
8 compat_urllib_parse_urlparse
,
18 class PornHubIE(InfoExtractor
):
19 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
21 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
22 'file': '648719015.mp4',
23 'md5': '882f488fa1f0026f023f33576004a2ed',
25 "uploader": "BABES-COM",
26 "title": "Seductive Indian beauty strips down and fingers her pink pussy",
31 def _extract_count(self
, pattern
, webpage
, name
):
32 count
= self
._html
_search
_regex
(pattern
, webpage
, '%s count' % name
, fatal
=False)
34 count
= str_to_int(count
)
37 def _real_extract(self
, url
):
38 mobj
= re
.match(self
._VALID
_URL
, url
)
39 video_id
= mobj
.group('videoid')
40 url
= 'http://www.' + mobj
.group('url')
42 req
= compat_urllib_request
.Request(url
)
43 req
.add_header('Cookie', 'age_verified=1')
44 webpage
= self
._download
_webpage
(req
, video_id
)
46 video_title
= self
._html
_search
_regex
(r
'<h1 [^>]+>([^<]+)', webpage
, 'title')
47 video_uploader
= self
._html
_search
_regex
(
48 r
'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
49 webpage
, 'uploader', fatal
=False)
50 thumbnail
= self
._html
_search
_regex
(r
'"image_url":"([^"]+)', webpage
, 'thumbnail', fatal
=False)
52 thumbnail
= compat_urllib_parse
.unquote(thumbnail
)
54 view_count
= self
._extract
_count
(r
'<span class="count">([\d,\.]+)</span> views', webpage
, 'view')
55 like_count
= self
._extract
_count
(r
'<span class="votesUp">([\d,\.]+)</span>', webpage
, 'like')
56 dislike_count
= self
._extract
_count
(r
'<span class="votesDown">([\d,\.]+)</span>', webpage
, 'dislike')
57 comment_count
= self
._extract
_count
(
58 r
'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage
, 'comment')
60 video_urls
= list(map(compat_urllib_parse
.unquote
, re
.findall(r
'"quality_[0-9]{3}p":"([^"]+)', webpage
)))
61 if webpage
.find('"encrypted":true') != -1:
62 password
= compat_urllib_parse
.unquote_plus(self
._html
_search
_regex
(r
'"video_title":"([^"]+)', webpage
, 'password'))
63 video_urls
= list(map(lambda s
: aes_decrypt_text(s
, password
, 32).decode('utf-8'), video_urls
))
66 for video_url
in video_urls
:
67 path
= compat_urllib_parse_urlparse(video_url
).path
68 extension
= os
.path
.splitext(path
)[1][1:]
69 format
= path
.split('/')[5].split('_')[:2]
70 format
= "-".join(format
)
72 m
= re
.match(r
'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format
)
77 height
= int(m
.group('height'))
78 tbr
= int(m
.group('tbr'))
88 self
._sort
_formats
(formats
)
92 'uploader': video_uploader
,
94 'thumbnail': thumbnail
,
95 'view_count': view_count
,
96 'like_count': like_count
,
97 'dislike_count': dislike_count
,
98 'comment_count': comment_count
,