X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/99b46abf79266c2bf5eabc291dc4f366e61b5dde..233624c1db781ee7dabbaf88453cf18e248dd20d:/youtube_dl/extractor/tube8.py diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c..368c457 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,12 +23,20 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, - } + 'categories': ['Teen'], + 'tags': ['dancing'], + }, }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)', + webpage) + def _real_extract(self, url): webpage, info = self._extract_info(url) @@ -51,6 +61,17 @@ class Tube8IE(KeezMoviesIE): r'(\d+)', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*\s*]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +79,8 @@ class Tube8IE(KeezMoviesIE): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info