4 from .common
import InfoExtractor
6 compat_urllib_parse_urlparse
,
14 class PornHubIE(InfoExtractor
):
15 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))'
17 u
'url': u
'http://www.pornhub.com/view_video.php?viewkey=648719015',
18 u
'file': u
'648719015.mp4',
19 u
'md5': u
'882f488fa1f0026f023f33576004a2ed',
21 u
"uploader": u
"BABES-COM",
22 u
"title": u
"Seductive Indian beauty strips down and fingers her pink pussy",
27 def _real_extract(self
, url
):
28 mobj
= re
.match(self
._VALID
_URL
, url
)
29 video_id
= mobj
.group('videoid')
30 url
= 'http://www.' + mobj
.group('url')
32 req
= compat_urllib_request
.Request(url
)
33 req
.add_header('Cookie', 'age_verified=1')
34 webpage
= self
._download
_webpage
(req
, video_id
)
36 video_title
= self
._html
_search
_regex
(r
'<h1 [^>]+>([^<]+)', webpage
, u
'title')
37 video_uploader
= self
._html
_search
_regex
(r
'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage
, u
'uploader', fatal
=False)
38 thumbnail
= self
._html
_search
_regex
(r
'"image_url":"([^"]+)', webpage
, u
'thumbnail', fatal
=False)
40 thumbnail
= compat_urllib_parse
.unquote(thumbnail
)
42 video_urls
= list(map(compat_urllib_parse
.unquote
, re
.findall(r
'"quality_[0-9]{3}p":"([^"]+)', webpage
)))
43 if webpage
.find('"encrypted":true') != -1:
44 password
= self
._html
_search
_regex
(r
'"video_title":"([^"]+)', webpage
, u
'password').replace('+', ' ')
45 video_urls
= list(map(lambda s
: aes_decrypt_text(s
, password
, 32).decode('utf-8'), video_urls
))
48 for video_url
in video_urls
:
49 path
= compat_urllib_parse_urlparse(video_url
).path
50 extension
= os
.path
.splitext(path
)[1][1:]
51 format
= path
.split('/')[5].split('_')[:2]
52 format
= "-".join(format
)
59 formats
.sort(key
=lambda format
: list(map(lambda s
: s
.zfill(6), format
['format'].split('-'))))
63 'uploader': video_uploader
,
65 'thumbnail': thumbnail
,