]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/drtuber.py
37c5c181f799efd8ee69d850c0b6076130c64073
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..utils 
import str_to_int
 
   9 class DrTuberIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)' 
  12         'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 
  13         'md5': '93e680cf2536ad0dfb7e74d94a89facd', 
  16             'display_id': 'hot-perky-blonde-naked-golf', 
  18             'title': 'hot perky blonde naked golf', 
  22             'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], 
  23             'thumbnail': 're:https?://.*\.jpg$', 
  28     def _real_extract(self
, url
): 
  29         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  30         video_id 
= mobj
.group('id') 
  31         display_id 
= mobj
.group('display_id') 
  33         webpage 
= self
._download
_webpage
(url
, display_id
) 
  35         video_url 
= self
._html
_search
_regex
( 
  36             r
'<source src="([^"]+)"', webpage
, 'video URL') 
  38         title 
= self
._html
_search
_regex
( 
  39             [r
'class="hd_title" style="[^"]+">([^<]+)</h1>', r
'<title>([^<]+) - \d+'], 
  42         thumbnail 
= self
._html
_search
_regex
( 
  44             webpage
, 'thumbnail', fatal
=False) 
  46         like_count 
= str_to_int(self
._html
_search
_regex
( 
  47             r
'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>', 
  48             webpage
, 'like count', fatal
=False)) 
  49         dislike_count 
= str_to_int(self
._html
_search
_regex
( 
  50             r
'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>', 
  51             webpage
, 'like count', fatal
=False)) 
  52         comment_count 
= str_to_int(self
._html
_search
_regex
( 
  53             r
'<span class="comments_count">([\d,\.]+)</span>', 
  54             webpage
, 'comment count', fatal
=False)) 
  56         cats_str 
= self
._search
_regex
( 
  57             r
'<span>Categories:</span><div>(.+?)</div>', webpage
, 'categories', fatal
=False) 
  58         categories 
= [] if not cats_str 
else re
.findall(r
'<a title="([^"]+)"', cats_str
) 
  62             'display_id': display_id
, 
  65             'thumbnail': thumbnail
, 
  66             'like_count': like_count
, 
  67             'dislike_count': dislike_count
, 
  68             'comment_count': comment_count
, 
  69             'categories': categories
, 
  70             'age_limit': self
._rta
_search
(webpage
),