1 from __future__
import unicode_literals
3 from .common
import InfoExtractor
6 class SexuIE(InfoExtractor
):
7 _VALID_URL
= r
'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
9 'url': 'http://sexu.com/961791/',
10 'md5': 'ff615aca9691053c94f8f10d96cd7884',
14 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
15 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
16 'categories': list, # NSFW
17 'thumbnail': r
're:https?://.*\.jpg$',
22 def _real_extract(self
, url
):
23 video_id
= self
._match
_id
(url
)
24 webpage
= self
._download
_webpage
(url
, video_id
)
26 jwvideo
= self
._parse
_json
(
27 self
._search
_regex
(r
'\.setup\(\s*({.+?})\s*\);', webpage
, 'jwvideo'),
30 sources
= jwvideo
['sources']
33 'url': source
['file'].replace('\\', ''),
34 'format_id': source
.get('label'),
35 'height': self
._search
_regex
(
36 r
'^(\d+)[pP]', source
.get('label', ''), 'height', default
=None),
37 } for source
in sources
if source
.get('file')]
38 self
._sort
_formats
(formats
)
40 title
= self
._html
_search
_regex
(
41 r
'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage
, 'title')
43 description
= self
._html
_search
_meta
(
44 'description', webpage
, 'description')
46 thumbnail
= jwvideo
.get('image')
48 categories_str
= self
._html
_search
_meta
(
49 'keywords', webpage
, 'categories')
51 None if categories_str
is None
52 else categories_str
.split(','))
57 'description': description
,
58 'thumbnail': thumbnail
,
59 'categories': categories
,