Update README.md

[youtubedl] / youtube_dl / extractor / twentyfourvideo.py
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py

index f3541b6540c2b772afbc2d89d097f75ac531a2b3..74d14049b482a702bf464a40f2e5f361dc7cd72a 100644 (file)
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -1,6 +1,8 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
  # coding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      parse_iso8601,
  from .common import InfoExtractor
  from ..utils import (
      parse_iso8601,
@@ -12,7 +14,18 @@ from ..utils import (
  
  class TwentyFourVideoIE(InfoExtractor):
      IE_NAME = '24video'
  
  class TwentyFourVideoIE(InfoExtractor):
      IE_NAME = '24video'
-    _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?P<host>
+                            (?:(?:www|porno?)\.)?24video\.
+                            (?:net|me|xxx|sexy?|tube|adult|site|vip)
+                        )/
+                        (?:
+                            video/(?:(?:view|xml)/)?|
+                            player/new24_play\.swf\?id=
+                        )
+                        (?P<id>\d+)
+                    '''
  
      _TESTS = [{
          'url': 'http://www.24video.net/video/view/1044982',
  
      _TESTS = [{
          'url': 'http://www.24video.net/video/view/1044982',
@@ -40,13 +53,27 @@ class TwentyFourVideoIE(InfoExtractor):
      }, {
          'url': 'http://www.24video.tube/video/view/2363750',
          'only_matching': True,
      }, {
          'url': 'http://www.24video.tube/video/view/2363750',
          'only_matching': True,
+    }, {
+        'url': 'https://www.24video.site/video/view/2640421',
+        'only_matching': True,
+    }, {
+        'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.24video.vip/video/view/1044982',
+        'only_matching': True,
+    }, {
+        'url': 'https://porn.24video.net/video/2640421-vsya-takay',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
      }]
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
  
          webpage = self._download_webpage(
  
          webpage = self._download_webpage(
-            'http://www.24video.sex/video/view/%s' % video_id, video_id)
+            'http://%s/video/view/%s' % (host, video_id), video_id)
  
          title = self._og_search_title(webpage)
          description = self._html_search_regex(
  
          title = self._og_search_title(webpage)
          description = self._html_search_regex(
@@ -56,8 +83,8 @@ class TwentyFourVideoIE(InfoExtractor):
          duration = int_or_none(self._og_search_property(
              'duration', webpage, 'duration', fatal=False))
          timestamp = parse_iso8601(self._search_regex(
          duration = int_or_none(self._og_search_property(
              'duration', webpage, 'duration', fatal=False))
          timestamp = parse_iso8601(self._search_regex(
-            r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
-            webpage, 'upload date'))
+            r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
+            webpage, 'upload date', fatal=False))
  
          uploader = self._html_search_regex(
              r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
  
          uploader = self._html_search_regex(
              r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
@@ -68,15 +95,15 @@ class TwentyFourVideoIE(InfoExtractor):
              webpage, 'view count', fatal=False))
          comment_count = int_or_none(self._html_search_regex(
              r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
              webpage, 'view count', fatal=False))
          comment_count = int_or_none(self._html_search_regex(
              r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
-            webpage, 'comment count', fatal=False))
+            webpage, 'comment count', default=None))
  
          # Sets some cookies
          self._download_xml(
  
          # Sets some cookies
          self._download_xml(
-            r'http://www.24video.sex/video/xml/%s?mode=init' % video_id,
+            r'http://%s/video/xml/%s?mode=init' % (host, video_id),
              video_id, 'Downloading init XML')
  
          video_xml = self._download_xml(
              video_id, 'Downloading init XML')
  
          video_xml = self._download_xml(
-            'http://www.24video.sex/video/xml/%s?mode=play' % video_id,
+            'http://%s/video/xml/%s?mode=play' % (host, video_id),
              video_id, 'Downloading video XML')
  
          video = xpath_element(video_xml, './/video', 'video', fatal=True)
              video_id, 'Downloading video XML')
  
          video = xpath_element(video_xml, './/video', 'video', fatal=True)