X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9f2b33881274af98a9145c533a1d295fad71521a..d1d8237037fb87a57bd1d073c3b9e642d4a75016:/youtube_dl/extractor/pornhub.py diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 5a55c25..6d57e1d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,10 +1,13 @@ +# coding: utf-8 from __future__ import unicode_literals +import itertools import os import re from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlparse, @@ -12,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + orderedSet, sanitized_Request, str_to_int, ) @@ -36,7 +40,25 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, - } + }, + }, { + # non-ASCII title + 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', + 'info_dict': { + 'id': '1331683002', + 'ext': 'mp4', + 'title': 'éåºå©·å©·å¥³ç足交', + 'uploader': 'cj397186295', + 'duration': 1753, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, @@ -73,19 +95,25 @@ class PornHubIE(InfoExtractor): 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) + # video_title from flashvars contains whitespace instead of non-ASCII (see + # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying + # on that anymore. + title = self._html_search_meta( + 'twitter:title', webpage, default=None) or self._search_regex( + (r'