1 from __future__
import unicode_literals
6 from .common
import InfoExtractor
9 compat_urllib_parse_urlparse
,
10 compat_urllib_request
,
21 class PornHubIE(InfoExtractor
):
22 _VALID_URL
= r
'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
24 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
25 'md5': '882f488fa1f0026f023f33576004a2ed',
30 "title": "Seductive Indian beauty strips down and fingers her pink pussy",
35 def _extract_count(self
, pattern
, webpage
, name
):
36 return str_to_int(self
._search
_regex
(
37 pattern
, webpage
, '%s count' % name
, fatal
=False))
39 def _real_extract(self
, url
):
40 video_id
= self
._match
_id
(url
)
42 req
= compat_urllib_request
.Request(url
)
43 req
.add_header('Cookie', 'age_verified=1')
44 webpage
= self
._download
_webpage
(req
, video_id
)
46 error_msg
= self
._html
_search
_regex
(
47 r
'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
48 webpage
, 'error message', default
=None)
50 error_msg
= re
.sub(r
'\s+', ' ', error_msg
)
52 'PornHub said: %s' % error_msg
,
53 expected
=True, video_id
=video_id
)
55 video_title
= self
._html
_search
_regex
(r
'<h1 [^>]+>([^<]+)', webpage
, 'title')
56 video_uploader
= self
._html
_search
_regex
(
57 r
'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
58 webpage
, 'uploader', fatal
=False)
59 thumbnail
= self
._html
_search
_regex
(r
'"image_url":"([^"]+)', webpage
, 'thumbnail', fatal
=False)
61 thumbnail
= compat_urllib_parse
.unquote(thumbnail
)
63 view_count
= self
._extract
_count
(
64 r
'<span class="count">([\d,\.]+)</span> views', webpage
, 'view')
65 like_count
= self
._extract
_count
(
66 r
'<span class="votesUp">([\d,\.]+)</span>', webpage
, 'like')
67 dislike_count
= self
._extract
_count
(
68 r
'<span class="votesDown">([\d,\.]+)</span>', webpage
, 'dislike')
69 comment_count
= self
._extract
_count
(
70 r
'All Comments\s*<span>\(([\d,.]+)\)', webpage
, 'comment')
72 video_urls
= list(map(compat_urllib_parse
.unquote
, re
.findall(r
'"quality_[0-9]{3}p":"([^"]+)', webpage
)))
73 if webpage
.find('"encrypted":true') != -1:
74 password
= compat_urllib_parse
.unquote_plus(
75 self
._search
_regex
(r
'"video_title":"([^"]+)', webpage
, 'password'))
76 video_urls
= list(map(lambda s
: aes_decrypt_text(s
, password
, 32).decode('utf-8'), video_urls
))
79 for video_url
in video_urls
:
80 path
= compat_urllib_parse_urlparse(video_url
).path
81 extension
= os
.path
.splitext(path
)[1][1:]
82 format
= path
.split('/')[5].split('_')[:2]
83 format
= "-".join(format
)
85 m
= re
.match(r
'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format
)
90 height
= int(m
.group('height'))
91 tbr
= int(m
.group('tbr'))
101 self
._sort
_formats
(formats
)
105 'uploader': video_uploader
,
106 'title': video_title
,
107 'thumbnail': thumbnail
,
108 'view_count': view_count
,
109 'like_count': like_count
,
110 'dislike_count': dislike_count
,
111 'comment_count': comment_count
,
117 class PornHubPlaylistIE(InfoExtractor
):
118 _VALID_URL
= r
'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
120 'url': 'http://www.pornhub.com/playlist/6201671',
125 'playlist_mincount': 35,
128 def _real_extract(self
, url
):
129 playlist_id
= self
._match
_id
(url
)
131 webpage
= self
._download
_webpage
(url
, playlist_id
)
134 self
.url_result('http://www.pornhub.com/%s' % video_url
, 'PornHub')
135 for video_url
in set(re
.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage
))
138 playlist
= self
._parse
_json
(
140 r
'playlistObject\s*=\s*({.+?});', webpage
, 'playlist'),
143 return self
.playlist_result(
144 entries
, playlist_id
, playlist
.get('title'), playlist
.get('description'))