]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/drtuber.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..utils 
import str_to_int
 
   9 class DrTuberIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)' 
  12         'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 
  13         'md5': '93e680cf2536ad0dfb7e74d94a89facd', 
  16             'display_id': 'hot-perky-blonde-naked-golf', 
  18             'title': 'Hot Perky Blonde Naked Golf', 
  22             'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], 
  23             'thumbnail': 're:https?://.*\.jpg$', 
  28     def _real_extract(self
, url
): 
  29         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  30         video_id 
= mobj
.group('id') 
  31         display_id 
= mobj
.group('display_id') 
  33         webpage 
= self
._download
_webpage
(url
, display_id
) 
  35         video_url 
= self
._html
_search
_regex
( 
  36             r
'<source src="([^"]+)"', webpage
, 'video URL') 
  38         title 
= self
._html
_search
_regex
( 
  39             r
'<title>([^<]+)\s*-\s*Free', webpage
, 'title') 
  41         thumbnail 
= self
._html
_search
_regex
( 
  43             webpage
, 'thumbnail', fatal
=False) 
  45         like_count 
= str_to_int(self
._html
_search
_regex
( 
  46             r
'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>', 
  47             webpage
, 'like count', fatal
=False)) 
  48         dislike_count 
= str_to_int(self
._html
_search
_regex
( 
  49             r
'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>', 
  50             webpage
, 'like count', fatal
=False)) 
  51         comment_count 
= str_to_int(self
._html
_search
_regex
( 
  52             r
'<span class="comments_count">([\d,\.]+)</span>', 
  53             webpage
, 'comment count', fatal
=False)) 
  55         cats_str 
= self
._search
_regex
( 
  56             r
'<span>Categories:</span><div>(.+?)</div>', webpage
, 'categories', fatal
=False) 
  57         categories 
= [] if not cats_str 
else re
.findall(r
'<a title="([^"]+)"', cats_str
) 
  61             'display_id': display_id
, 
  64             'thumbnail': thumbnail
, 
  65             'like_count': like_count
, 
  66             'dislike_count': dislike_count
, 
  67             'comment_count': comment_count
, 
  68             'categories': categories
, 
  69             'age_limit': self
._rta
_search
(webpage
),