]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/watchindianporn.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import parse_duration
10 class WatchIndianPornIE(InfoExtractor
):
11 IE_DESC
= 'Watch Indian Porn'
12 _VALID_URL
= r
'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
14 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
15 'md5': '249589a164dde236ec65832bfce17440',
18 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
20 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
21 'thumbnail': r
're:^https?://.*\.jpg$',
29 def _real_extract(self
, url
):
30 mobj
= re
.match(self
._VALID
_URL
, url
)
31 video_id
= mobj
.group('id')
32 display_id
= mobj
.group('display_id')
34 webpage
= self
._download
_webpage
(url
, display_id
)
36 info_dict
= self
._parse
_html
5_media
_entries
(url
, webpage
, video_id
)[0]
38 title
= self
._html
_search
_regex
((
39 r
'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
43 duration
= parse_duration(self
._search
_regex
(
44 r
'Time:\s*<strong>\s*(.+?)\s*</strong>',
45 webpage
, 'duration', fatal
=False))
47 view_count
= int(self
._search
_regex
(
48 r
'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
49 webpage
, 'view count', fatal
=False))
51 categories
= re
.findall(
52 r
'<a[^>]+class=[\'"]categories[\'"][^
>]*>\s
*([^
<]+)\s
*</a
>',
57 'display_id
': display_id,
63 'view_count
': view_count,
64 'categories
': categories,