]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/xnxx.py
debian/control: Update list of extractors.
[youtubedl] / youtube_dl / extractor / xnxx.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 compat_urllib_parse,
6
7 ExtractorError,
8 )
9
10
11 class XNXXIE(InfoExtractor):
12 _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
13 VIDEO_URL_RE = r'flv_url=(.*?)&'
14 VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
15 VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
16 _TEST = {
17 u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
18 u'file': u'1135332.flv',
19 u'md5': u'0831677e2b4761795f68d417e0b7b445',
20 u'info_dict': {
21 u"title": u"lida \u00bb Naked Funny Actress (5)",
22 u"age_limit": 18,
23 }
24 }
25
26 def _real_extract(self, url):
27 mobj = re.match(self._VALID_URL, url)
28 if mobj is None:
29 raise ExtractorError(u'Invalid URL: %s' % url)
30 video_id = mobj.group(1)
31
32 # Get webpage content
33 webpage = self._download_webpage(url, video_id)
34
35 video_url = self._search_regex(self.VIDEO_URL_RE,
36 webpage, u'video URL')
37 video_url = compat_urllib_parse.unquote(video_url)
38
39 video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
40 webpage, u'title')
41
42 video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
43 webpage, u'thumbnail', fatal=False)
44
45 return [{
46 'id': video_id,
47 'url': video_url,
48 'uploader': None,
49 'upload_date': None,
50 'title': video_title,
51 'ext': 'flv',
52 'thumbnail': video_thumbnail,
53 'description': None,
54 'age_limit': 18,
55 }]