]+\bhref=[^>]+>([^<]+)', div)
return {
'id': video_id,
'uploader': video_uploader,
+ 'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
@@ -208,78 +359,243 @@ class PornHubIE(InfoExtractor):
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
- 'tags': tags,
- 'categories': categories,
+ 'tags': extract_list('tags'),
+ 'categories': extract_list('categories'),
+ 'subtitles': subtitles,
}
-class PornHubPlaylistBaseIE(InfoExtractor):
- def _extract_entries(self, webpage):
+class PornHubPlaylistBaseIE(PornHubBaseIE):
+ def _extract_entries(self, webpage, host):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
return [
self.url_result(
- 'http://www.pornhub.com/%s' % video_url,
+ 'http://www.%s/%s' % (host, video_url),
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
- webpage))
+ container))
]
def _real_extract(self, url):
- playlist_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
- entries = self._extract_entries(webpage)
+ entries = self._extract_entries(webpage, host)
playlist = self._parse_json(
self._search_regex(
- r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
- playlist_id)
+ r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
+ 'playlist', default='{}'),
+ playlist_id, fatal=False)
+ title = playlist.get('title') or self._search_regex(
+ r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
return self.playlist_result(
- entries, playlist_id, playlist.get('title'), playlist.get('description'))
+ entries, playlist_id, title, playlist.get('description'))
-class PornHubPlaylistIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P\d+)'
+class PornHubUserIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
_TESTS = [{
- 'url': 'http://www.pornhub.com/playlist/6201671',
- 'info_dict': {
- 'id': '6201671',
- 'title': 'P0p4',
- },
- 'playlist_mincount': 35,
- }]
-
-
-class PornHubUserVideosIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P[^/]+)/videos'
- _TESTS = [{
- 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+ 'url': 'https://www.pornhub.com/model/zoe_ph',
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
'info_dict': {
- 'id': 'zoe_ph',
+ 'id': 'liz-vicious',
},
- 'playlist_mincount': 171,
+ 'playlist_mincount': 118,
}, {
- 'url': 'http://www.pornhub.com/users/rushandlia/videos',
+ 'url': 'https://www.pornhub.com/users/russianveet69',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/channels/povd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
'only_matching': True,
}]
def _real_extract(self, url):
- user_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('id')
+ return self.url_result(
+ '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
+ video_id=user_id)
+
+
+class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
+ @staticmethod
+ def _has_more(webpage):
+ return re.search(
+ r'''(?x)
+ ]+\bclass=["\']page_next|
+ ]+\brel=["\']next|
+