X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/38bb9b1b0a044cabaf5691553815e334cd2e9213..7b550e005da7fd498cfbf1b7c04b05e5540eb6fc:/youtube_dl/extractor/drtuber.py?ds=sidebyside
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
index 639f918..5c41c80 100644
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -3,12 +3,15 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import str_to_int
+from ..utils import (
+ NO_DEFAULT,
+ str_to_int,
+)
class DrTuberIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P\d+)/(?P[\w-]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?'
+ _TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'info_dict': {
@@ -17,49 +20,82 @@ class DrTuberIE(InfoExtractor):
'ext': 'mp4',
'title': 'hot perky blonde naked golf',
'like_count': int,
- 'dislike_count': int,
'comment_count': int,
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.*\.jpg$',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://www.drtuber.com/embed/489939',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'
', r'([^<]+) - \d+'],
+ (r']+class=["\']title[^>]+>([^<]+)',
+ r'([^<]+)\s*@\s+DrTuber',
+ r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
+ r']+class="title_substrate">([^<]+)
',
+ r'([^<]+) - \d+'),
webpage, 'title')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"',
webpage, 'thumbnail', fatal=False)
- def extract_count(id_, name):
+ def extract_count(id_, name, default=NO_DEFAULT):
return str_to_int(self._html_search_regex(
r']+(?:class|id)="%s"[^>]*>([\d,\.]+)' % id_,
- webpage, '%s count' % name, fatal=False))
+ webpage, '%s count' % name, default=default, fatal=False))
like_count = extract_count('rate_likes', 'like')
- dislike_count = extract_count('rate_dislikes', 'dislike')
+ dislike_count = extract_count('rate_dislikes', 'dislike', default=None)
comment_count = extract_count('comments_count', 'comment')
cats_str = self._search_regex(
- r']+class="categories_list">(.+?)
', webpage, 'categories', fatal=False)
- categories = [] if not cats_str else re.findall(r']+class="categories_list">(.+?)',
+ webpage, 'categories', fatal=False)
+ categories = [] if not cats_str else re.findall(
+ r'