]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/pornhd.py
1 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import int_or_none
10 class PornHdIE(InfoExtractor
):
11 _VALID_URL
= r
'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
13 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
14 'md5': '956b8ca569f7f4d8ec563e2c41598441',
18 'title': 'Sierra loves doing laundry',
19 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
24 def _real_extract(self
, url
):
25 mobj
= re
.match(self
._VALID
_URL
, url
)
26 video_id
= mobj
.group('id')
28 webpage
= self
._download
_webpage
(url
, video_id
)
30 title
= self
._og
_search
_title
(webpage
)
31 TITLE_SUFFIX
= ' porn HD Video | PornHD.com '
32 if title
.endswith(TITLE_SUFFIX
):
33 title
= title
[:-len(TITLE_SUFFIX
)]
35 description
= self
._html
_search
_regex
(
36 r
'<div class="description">([^<]+)</div>', webpage
, 'description', fatal
=False)
37 view_count
= int_or_none(self
._html
_search
_regex
(
38 r
'(\d+) views </span>', webpage
, 'view count', fatal
=False))
43 'ext': format
.lower(),
44 'format_id': '%s-%s' % (format
.lower(), quality
.lower()),
45 'quality': 1 if quality
.lower() == 'high' else 0,
46 } for format
, quality
, format_url
in re
.findall(
47 r
'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http
://.+?
)\?noProxy
=1\'', webpage)
50 mobj = re.search(r'flashVars
= (?P
<flashvars
>{.+?
});', webpage)
52 flashvars = json.loads(mobj.group('flashvars
'))
55 'url
': flashvars['hashlink
'].replace('?noProxy
=1', ''),
57 'format_id
': 'flv
-low
',
61 'url
': flashvars['hd
'].replace('?noProxy
=1', ''),
63 'format_id
': 'flv
-high
',
67 thumbnail = flashvars['urlWallpaper
']
69 thumbnail = self._og_search_thumbnail(webpage)
71 self._sort_formats(formats)
76 'description
': description,
77 'thumbnail
': thumbnail,
78 'view_count
': view_count,