]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/anysex.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
12 class AnySexIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://(?:www\.)?anysex\.com/(?P<id>\d+)'
15 'url': 'http://anysex.com/156592/',
16 'md5': '023e9fbb7f7987f5529a394c34ad3d3d',
20 'title': 'Busty and sexy blondie in her bikini strips for you',
21 'description': 'md5:de9e418178e2931c10b62966474e1383',
22 'categories': ['Erotic'],
28 def _real_extract(self
, url
):
29 mobj
= re
.match(self
._VALID
_URL
, url
)
30 video_id
= mobj
.group('id')
32 webpage
= self
._download
_webpage
(url
, video_id
)
34 video_url
= self
._html
_search
_regex
(r
"video_url\s*:\s*'([^']+)'", webpage
, 'video URL')
36 title
= self
._html
_search
_regex
(r
'<title>(.*?)</title>', webpage
, 'title')
37 description
= self
._html
_search
_regex
(
38 r
'<div class="description"[^>]*>([^<]+)</div>', webpage
, 'description', fatal
=False)
39 thumbnail
= self
._html
_search
_regex
(
40 r
'preview_url\s*:\s*\'(.*?
)\'', webpage, 'thumbnail
', fatal=False)
42 categories = re.findall(
43 r'<a href
="http://anysex\.com/categories/[^"]+" title="[^
"]*">([^
<]+)</a
>', webpage)
45 duration = parse_duration(self._search_regex(
46 r'<b
>Duration
:</b
> (?
:<q itemprop
="duration">)?
(\d
+:\d
+)', webpage, 'duration
', fatal=False))
47 view_count = int_or_none(self._html_search_regex(
48 r'<b
>Views
:</b
> (\d
+)', webpage, 'view count
', fatal=False))
55 'description
': description,
56 'thumbnail
': thumbnail,
57 'categories
': categories,
59 'view_count
': view_count,