X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/99b46abf79266c2bf5eabc291dc4f366e61b5dde..8cecfc7b1c0dca62d6d3bf03a7b6deae8b909286:/youtube_dl/extractor/tube8.py diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c..db93b01 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,12 +23,20 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, - } + 'categories': ['Teen'], + 'tags': ['dancing'], + }, }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)', + webpage) + def _real_extract(self, url): webpage, info = self._extract_info(url) @@ -35,7 +45,7 @@ class Tube8IE(KeezMoviesIE): r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( - r'>Description:\s*(.+?)\s*<', webpage, 'description', fatal=False) + r'(?s)Description:\s*
(.+?)
', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'\s*(.+?)\s*<', webpage, 'uploader', fatal=False) @@ -45,12 +55,23 @@ class Tube8IE(KeezMoviesIE): dislike_count = int_or_none(self._search_regex( r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) view_count = str_to_int(self._search_regex( - r'Views: ([\d,\.]+)\s*', + r'Views:\s*\s*
([\d,\.]+)', webpage, 'view count', fatal=False)) comment_count = str_to_int(self._search_regex( r'(\d+)', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*\s*
\s*]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*\s*
(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +79,8 @@ class Tube8IE(KeezMoviesIE): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info