]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/pornovoisines.py
2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
15 class PornoVoisinesIE(InfoExtractor
):
16 _VALID_URL
= r
'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
18 _VIDEO_URL_TEMPLATE
= 'http://stream%d.pornovoisines.com' \
19 '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
21 _SERVER_NUMBERS
= (1, 2)
24 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
25 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
28 'display_id': 'recherche-appartement',
30 'title': 'Recherche appartement',
31 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
32 'thumbnail': 're:^https?://.*\.jpg$',
33 'upload_date': '20140925',
36 'average_rating': float,
37 'categories': ['Débutantes', 'Scénario', 'Sodomie'],
43 def build_video_url(cls
, num
):
44 return cls
._VIDEO
_URL
_TEMPLATE
% (random
.choice(cls
._SERVER
_NUMBERS
), num
)
46 def _real_extract(self
, url
):
47 mobj
= re
.match(self
._VALID
_URL
, url
)
48 video_id
= mobj
.group('id')
49 display_id
= mobj
.group('display_id')
51 webpage
= self
._download
_webpage
(url
, video_id
)
53 video_url
= self
.build_video_url(video_id
)
55 title
= self
._html
_search
_regex
(
56 r
'<h1>(.+?)</h1>', webpage
, 'title', flags
=re
.DOTALL
)
57 description
= self
._html
_search
_regex
(
58 r
'<article id="descriptif">(.+?)</article>',
59 webpage
, "description", fatal
=False, flags
=re
.DOTALL
)
61 thumbnail
= self
._search
_regex
(
62 r
'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id
,
63 webpage
, 'thumbnail', fatal
=False)
65 thumbnail
= 'http://www.pornovoisines.com/%s' % thumbnail
67 upload_date
= unified_strdate(self
._search
_regex
(
68 r
'Publié le ([\d-]+)', webpage
, 'upload date', fatal
=False))
69 duration
= int_or_none(self
._search
_regex
(
70 'Durée (\d+)', webpage
, 'duration', fatal
=False))
71 view_count
= int_or_none(self
._search
_regex
(
72 r
'(\d+) vues', webpage
, 'view count', fatal
=False))
73 average_rating
= self
._search
_regex
(
74 r
'Note\s*:\s*(\d+(?:,\d+)?)', webpage
, 'average rating', fatal
=False)
76 average_rating
= float_or_none(average_rating
.replace(',', '.'))
78 categories
= self
._html
_search
_meta
(
79 'keywords', webpage
, 'categories', fatal
=False)
81 categories
= [category
.strip() for category
in categories
.split(',')]
85 'display_id': display_id
,
88 'description': description
,
89 'thumbnail': thumbnail
,
90 'upload_date': upload_date
,
92 'view_count': view_count
,
93 'average_rating': average_rating
,
94 'categories': categories
,