Imported Upstream version 2016.08.17

[youtubedl] / youtube_dl / extractor / motherless.py
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py

index 3621ff99e76da1bffabda1a81f6181fd4a6ed61c..5e1a8a71a93aa28962d7f260af966d10cf8e9f7a 100644 (file)
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -5,62 +5,73 @@ import re
  
  from .common import InfoExtractor
  from ..utils import (
  
  from .common import InfoExtractor
  from ..utils import (
+    ExtractorError,
      str_to_int,
      unified_strdate,
  )
  
  
  class MotherlessIE(InfoExtractor):
      str_to_int,
      unified_strdate,
  )
  
  
  class MotherlessIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
-    _TESTS = [
-        {
-            'url': 'http://motherless.com/AC3FFE1',
-            'md5': '310f62e325a9fafe64f68c0bccb6e75f',
-            'info_dict': {
-                'id': 'AC3FFE1',
-                'ext': 'mp4',
-                'title': 'Fucked in the ass while playing PS3',
-                'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
-                'upload_date': '20100913',
-                'uploader_id': 'famouslyfuckedup',
-                'thumbnail': 're:http://.*\.jpg',
-                'age_limit': 18,
-            }
-        },
-        {
-            'url': 'http://motherless.com/532291B',
-            'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
-            'info_dict': {
-                'id': '532291B',
-                'ext': 'mp4',
-                'title': 'Amazing girl playing the omegle game, PERFECT!',
-                'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
-                'upload_date': '20140622',
-                'uploader_id': 'Sulivana7x',
-                'thumbnail': 're:http://.*\.jpg',
-                'age_limit': 18,
-            }
+    _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
+    _TESTS = [{
+        'url': 'http://motherless.com/AC3FFE1',
+        'md5': '310f62e325a9fafe64f68c0bccb6e75f',
+        'info_dict': {
+            'id': 'AC3FFE1',
+            'ext': 'mp4',
+            'title': 'Fucked in the ass while playing PS3',
+            'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
+            'upload_date': '20100913',
+            'uploader_id': 'famouslyfuckedup',
+            'thumbnail': 're:http://.*\.jpg',
+            'age_limit': 18,
+        }
+    }, {
+        'url': 'http://motherless.com/532291B',
+        'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
+        'info_dict': {
+            'id': '532291B',
+            'ext': 'mp4',
+            'title': 'Amazing girl playing the omegle game, PERFECT!',
+            'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
+                           'game', 'hairy'],
+            'upload_date': '20140622',
+            'uploader_id': 'Sulivana7x',
+            'thumbnail': 're:http://.*\.jpg',
+            'age_limit': 18,
          },
          },
-        {
-            'url': 'http://motherless.com/g/cosplay/633979F',
-            'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
-            'info_dict': {
-                'id': '633979F',
-                'ext': 'mp4',
-                'title': 'Turtlette',
-                'categories': ['superheroine heroine  superher'],
-                'upload_date': '20140827',
-                'uploader_id': 'shade0230',
-                'thumbnail': 're:http://.*\.jpg',
-                'age_limit': 18,
-            }
+        'skip': '404',
+    }, {
+        'url': 'http://motherless.com/g/cosplay/633979F',
+        'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+        'info_dict': {
+            'id': '633979F',
+            'ext': 'mp4',
+            'title': 'Turtlette',
+            'categories': ['superheroine heroine  superher'],
+            'upload_date': '20140827',
+            'uploader_id': 'shade0230',
+            'thumbnail': 're:http://.*\.jpg',
+            'age_limit': 18,
          }
          }
-    ]
+    }, {
+        # no keywords
+        'url': 'http://motherless.com/8B4BBC1',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
+        if any(p in webpage for p in (
+                '<title>404 - MOTHERLESS.COM<',
+                ">The page you're looking for cannot be found.<")):
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        if '>The content you are trying to view is for friends only.' in webpage:
+            raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
+
          title = self._html_search_regex(
              r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
          video_url = self._html_search_regex(
          title = self._html_search_regex(
              r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
          video_url = self._html_search_regex(
@@ -72,7 +83,7 @@ class MotherlessIE(InfoExtractor):
          like_count = str_to_int(self._html_search_regex(
              r'<strong>Favorited</strong>\s+([^<]+)<',
              webpage, 'like count', fatal=False))
          like_count = str_to_int(self._html_search_regex(
              r'<strong>Favorited</strong>\s+([^<]+)<',
              webpage, 'like count', fatal=False))
- 
+
          upload_date = self._html_search_regex(
              r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
          if 'Ago' in upload_date:
          upload_date = self._html_search_regex(
              r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
          if 'Ago' in upload_date:
@@ -86,7 +97,7 @@ class MotherlessIE(InfoExtractor):
              r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
              webpage, 'uploader_id')
  
              r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
              webpage, 'uploader_id')
  
-        categories = self._html_search_meta('keywords', webpage)
+        categories = self._html_search_meta('keywords', webpage, default=None)
          if categories:
              categories = [cat.strip() for cat in categories.split(',')]
  
          if categories:
              categories = [cat.strip() for cat in categories.split(',')]