]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/watchindianporn.py
5d3b5bdb4cb904acabea0864dd76be8a0cc62c30
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
14 class WatchIndianPornIE(InfoExtractor
):
15 IE_DESC
= 'Watch Indian Porn'
16 _VALID_URL
= r
'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
18 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
19 'md5': '249589a164dde236ec65832bfce17440',
22 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
24 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
25 'thumbnail': 're:^https?://.*\.jpg$',
26 'uploader': 'LoveJay',
27 'upload_date': '20160428',
36 def _real_extract(self
, url
):
37 mobj
= re
.match(self
._VALID
_URL
, url
)
38 video_id
= mobj
.group('id')
39 display_id
= mobj
.group('display_id')
41 webpage
= self
._download
_webpage
(url
, display_id
)
43 video_url
= self
._html
_search
_regex
(
44 r
"url: escape\('([^']+)'\)", webpage
, 'url')
46 title
= self
._html
_search
_regex
(
47 r
'<h2 class="he2"><span>(.*?)</span>',
49 thumbnail
= self
._html
_search
_regex
(
50 r
'<span id="container"><img\s+src="([^"]+)"',
51 webpage
, 'thumbnail', fatal
=False)
53 uploader
= self
._html
_search
_regex
(
54 r
'class="aupa">\s*(.*?)</a>',
56 upload_date
= unified_strdate(self
._html
_search
_regex
(
57 r
'Added: <strong>(.+?)</strong>', webpage
, 'upload date', fatal
=False))
59 duration
= parse_duration(self
._search
_regex
(
60 r
'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
61 webpage
, 'duration', fatal
=False))
63 view_count
= int_or_none(self
._search
_regex
(
64 r
'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
65 webpage
, 'view count', fatal
=False))
66 comment_count
= int_or_none(self
._search
_regex
(
67 r
'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
68 webpage
, 'comment count', fatal
=False))
70 categories
= re
.findall(
71 r
'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
76 'display_id': display_id
,
82 'thumbnail': thumbnail
,
84 'upload_date': upload_date
,
86 'view_count': view_count
,
87 'comment_count': comment_count
,
88 'categories': categories
,