]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mofosex.py
b9430b09b749ad8c1dba28e3db794ef435b5aac8
[youtubedl] / youtube_dl / extractor / mofosex.py
1 import os
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6 compat_urllib_parse_urlparse,
7 compat_urllib_request,
8 compat_urllib_parse,
9 )
10
11 class MofosexIE(InfoExtractor):
12 _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
13 _TEST = {
14 u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
15 u'file': u'5018.mp4',
16 u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
17 u'info_dict': {
18 u"title": u"Japanese Teen Music Video",
19 u"age_limit": 18,
20 }
21 }
22
23 def _real_extract(self, url):
24 mobj = re.match(self._VALID_URL, url)
25 video_id = mobj.group('videoid')
26 url = 'http://www.' + mobj.group('url')
27
28 req = compat_urllib_request.Request(url)
29 req.add_header('Cookie', 'age_verified=1')
30 webpage = self._download_webpage(req, video_id)
31
32 video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
33 video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
34 path = compat_urllib_parse_urlparse(video_url).path
35 extension = os.path.splitext(path)[1][1:]
36 format = path.split('/')[5].split('_')[:2]
37 format = "-".join(format)
38
39 age_limit = self._rta_search(webpage)
40
41 return {
42 'id': video_id,
43 'title': video_title,
44 'url': video_url,
45 'ext': extension,
46 'format': format,
47 'format_id': format,
48 'age_limit': age_limit,
49 }