]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/xnxx.py
Merge tag 'upstream/2013.08.08'
[youtubedl] / youtube_dl / extractor / xnxx.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 compat_urllib_parse,
6
7 ExtractorError,
8 )
9
10
11 class XNXXIE(InfoExtractor):
12 _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
13 VIDEO_URL_RE = r'flv_url=(.*?)&'
14 VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
15 VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
16 _TEST = {
17 u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
18 u'file': u'1135332.flv',
19 u'md5': u'0831677e2b4761795f68d417e0b7b445',
20 u'info_dict': {
21 u"title": u"lida \u00bb Naked Funny Actress (5)"
22 }
23 }
24
25 def _real_extract(self, url):
26 mobj = re.match(self._VALID_URL, url)
27 if mobj is None:
28 raise ExtractorError(u'Invalid URL: %s' % url)
29 video_id = mobj.group(1)
30
31 # Get webpage content
32 webpage = self._download_webpage(url, video_id)
33
34 video_url = self._search_regex(self.VIDEO_URL_RE,
35 webpage, u'video URL')
36 video_url = compat_urllib_parse.unquote(video_url)
37
38 video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
39 webpage, u'title')
40
41 video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
42 webpage, u'thumbnail', fatal=False)
43
44 return [{
45 'id': video_id,
46 'url': video_url,
47 'uploader': None,
48 'upload_date': None,
49 'title': video_title,
50 'ext': 'flv',
51 'thumbnail': video_thumbnail,
52 'description': None,
53 }]