]> Raphaël G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/twentyfourvideo.py
d/rules: Run offline tests.
[youtubedl] / youtube_dl / extractor / twentyfourvideo.py
index 67e8bfea03476ccf78d2470a973655f2a7213730..1d66eeaff6e80cd1c629f79ad98a1a68f1d52564 100644 (file)
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     parse_iso8601,
     int_or_none,
 from .common import InfoExtractor
 from ..utils import (
     parse_iso8601,
     int_or_none,
+    xpath_attr,
+    xpath_element,
 )
 
 
 class TwentyFourVideoIE(InfoExtractor):
     IE_NAME = '24video'
 )
 
 
 class TwentyFourVideoIE(InfoExtractor):
     IE_NAME = '24video'
-    _VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
-
-    _TESTS = [
-        {
-            'url': 'http://www.24video.net/video/view/1044982',
-            'md5': '48dd7646775690a80447a8dca6a2df76',
-            'info_dict': {
-                'id': '1044982',
-                'ext': 'mp4',
-                'title': 'Эротика каменного века',
-                'description': 'Как смотрели порно в каменном веке.',
-                'thumbnail': 're:^https?://.*\.jpg$',
-                'uploader': 'SUPERTELO',
-                'duration': 31,
-                'timestamp': 1275937857,
-                'upload_date': '20100607',
-                'age_limit': 18,
-                'like_count': int,
-                'dislike_count': int,
-            },
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?P<host>
+                            (?:(?:www|porno)\.)?24video\.
+                            (?:net|me|xxx|sexy?|tube|adult|site)
+                        )/
+                        (?:
+                            video/(?:(?:view|xml)/)?|
+                            player/new24_play\.swf\?id=
+                        )
+                        (?P<id>\d+)
+                    '''
+
+    _TESTS = [{
+        'url': 'http://www.24video.net/video/view/1044982',
+        'md5': 'e09fc0901d9eaeedac872f154931deeb',
+        'info_dict': {
+            'id': '1044982',
+            'ext': 'mp4',
+            'title': 'Эротика каменного века',
+            'description': 'Как смотрели порно в каменном веке.',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'SUPERTELO',
+            'duration': 31,
+            'timestamp': 1275937857,
+            'upload_date': '20100607',
+            'age_limit': 18,
+            'like_count': int,
+            'dislike_count': int,
         },
         },
-        {
-            'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
-            'only_matching': True,
-        }
-    ]
+    }, {
+        'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.24video.me/video/view/1044982',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.24video.tube/video/view/2363750',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.24video.site/video/view/2640421',
+        'only_matching': True,
+    }, {
+        'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
 
         webpage = self._download_webpage(
 
         webpage = self._download_webpage(
-            'http://www.24video.net/video/view/%s' % video_id, video_id)
+            'http://%s/video/view/%s' % (host, video_id), video_id)
 
         title = self._og_search_title(webpage)
         description = self._html_search_regex(
 
         title = self._og_search_title(webpage)
         description = self._html_search_regex(
-            r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
+            r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
+            webpage, 'description', fatal=False, group='description')
         thumbnail = self._og_search_thumbnail(webpage)
         duration = int_or_none(self._og_search_property(
             'duration', webpage, 'duration', fatal=False))
         timestamp = parse_iso8601(self._search_regex(
         thumbnail = self._og_search_thumbnail(webpage)
         duration = int_or_none(self._og_search_property(
             'duration', webpage, 'duration', fatal=False))
         timestamp = parse_iso8601(self._search_regex(
-            r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
-            webpage, 'upload date'))
+            r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
+            webpage, 'upload date', fatal=False))
 
         uploader = self._html_search_regex(
 
         uploader = self._html_search_regex(
-            r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>',
+            r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
             webpage, 'uploader', fatal=False)
 
         view_count = int_or_none(self._html_search_regex(
             r'<span class="video-views">(\d+) просмотр',
             webpage, 'view count', fatal=False))
         comment_count = int_or_none(self._html_search_regex(
             webpage, 'uploader', fatal=False)
 
         view_count = int_or_none(self._html_search_regex(
             r'<span class="video-views">(\d+) просмотр',
             webpage, 'view count', fatal=False))
         comment_count = int_or_none(self._html_search_regex(
-            r'<div class="comments-title" id="comments-count">(\d+) комментари',
-            webpage, 'comment count', fatal=False))
-
-        formats = []
-
-        pc_video = self._download_xml(
-            'http://www.24video.net/video/xml/%s?mode=play' % video_id,
-            video_id, 'Downloading PC video URL').find('.//video')
+            r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
+            webpage, 'comment count', default=None))
 
 
-        formats.append({
-            'url': pc_video.attrib['url'],
-            'format_id': 'pc',
-            'quality': 1,
-        })
+        # Sets some cookies
+        self._download_xml(
+            r'http://%s/video/xml/%s?mode=init' % (host, video_id),
+            video_id, 'Downloading init XML')
 
 
-        like_count = int_or_none(pc_video.get('ratingPlus'))
-        dislike_count = int_or_none(pc_video.get('ratingMinus'))
-        age_limit = 18 if pc_video.get('adult') == 'true' else 0
+        video_xml = self._download_xml(
+            'http://%s/video/xml/%s?mode=play' % (host, video_id),
+            video_id, 'Downloading video XML')
 
 
-        mobile_video = self._download_xml(
-            'http://www.24video.net/video/xml/%s' % video_id,
-            video_id, 'Downloading mobile video URL').find('.//video')
+        video = xpath_element(video_xml, './/video', 'video', fatal=True)
 
 
-        formats.append({
-            'url': mobile_video.attrib['url'],
-            'format_id': 'mobile',
-            'quality': 0,
-        })
+        formats = [{
+            'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
+        }]
 
 
-        self._sort_formats(formats)
+        like_count = int_or_none(video.get('ratingPlus'))
+        dislike_count = int_or_none(video.get('ratingMinus'))
+        age_limit = 18 if video.get('adult') == 'true' else 0
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,