Fix extraction from youtube.

[youtubedl] / youtube_dl / extractor / metacafe.py
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py

index e6e7659a1de0ebe86f48a4128192de5d14d6d586..28f59f63c836a6f16f58c85969337e669e12dcb3 100644 (file)
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -6,12 +6,12 @@ from .common import InfoExtractor
  from ..compat import (
      compat_parse_qs,
      compat_urllib_parse_unquote,
  from ..compat import (
      compat_parse_qs,
      compat_urllib_parse_unquote,
+    compat_urllib_parse_urlencode,
  )
  from ..utils import (
      determine_ext,
      ExtractorError,
      int_or_none,
  )
  from ..utils import (
      determine_ext,
      ExtractorError,
      int_or_none,
-    urlencode_postdata,
      get_element_by_attribute,
      mimetype2ext,
  )
      get_element_by_attribute,
      mimetype2ext,
  )
@@ -50,6 +50,21 @@ class MetacafeIE(InfoExtractor):
              },
              'skip': 'Page is temporarily unavailable.',
          },
              },
              'skip': 'Page is temporarily unavailable.',
          },
+        # metacafe video with family filter
+        {
+            'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/',
+            'md5': 'b06082c5079bbdcde677a6291fbdf376',
+            'info_dict': {
+                'id': '2155630',
+                'ext': 'mp4',
+                'title': 'Adult Art By David Hart 156',
+                'uploader': '63346',
+                'description': 'md5:9afac8fc885252201ad14563694040fc',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
          # AnyClip video
          {
              'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
          # AnyClip video
          {
              'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
@@ -112,28 +127,12 @@ class MetacafeIE(InfoExtractor):
      def report_disclaimer(self):
          self.to_screen('Retrieving disclaimer')
  
      def report_disclaimer(self):
          self.to_screen('Retrieving disclaimer')
  
-    def _confirm_age(self):
-        # Retrieve disclaimer
-        self.report_disclaimer()
-        self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
-
-        # Confirm age
-        self.report_age_confirmation()
-        self._download_webpage(
-            self._FILTER_POST, None, False, 'Unable to confirm age',
-            data=urlencode_postdata({
-                'filters': '0',
-                'submit': "Continue - I'm over 18",
-            }), headers={
-                'Content-Type': 'application/x-www-form-urlencoded',
-            })
-
      def _real_extract(self, url):
          # Extract id and simplified title from URL
          video_id, display_id = re.match(self._VALID_URL, url).groups()
  
          # the video may come from an external site
      def _real_extract(self, url):
          # Extract id and simplified title from URL
          video_id, display_id = re.match(self._VALID_URL, url).groups()
  
          # the video may come from an external site
-        m_external = re.match('^(\w{2})-(.*)$', video_id)
+        m_external = re.match(r'^(\w{2})-(.*)$', video_id)
          if m_external is not None:
              prefix, ext_id = m_external.groups()
              # Check if video comes from YouTube
          if m_external is not None:
              prefix, ext_id = m_external.groups()
              # Check if video comes from YouTube
@@ -143,13 +142,15 @@ class MetacafeIE(InfoExtractor):
              if prefix == 'cb':
                  return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
  
              if prefix == 'cb':
                  return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
  
-        # self._confirm_age()
+        headers = {
+            # Disable family filter
+            'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
+        }
  
          # AnyClip videos require the flashversion cookie so that we get the link
          # to the mp4 file
  
          # AnyClip videos require the flashversion cookie so that we get the link
          # to the mp4 file
-        headers = {}
          if video_id.startswith('an-'):
          if video_id.startswith('an-'):
-            headers['Cookie'] = 'flashVersion=0;'
+            headers['Cookie'] += 'flashVersion=0; '
  
          # Retrieve video webpage to extract further information
          webpage = self._download_webpage(url, video_id, headers=headers)
  
          # Retrieve video webpage to extract further information
          webpage = self._download_webpage(url, video_id, headers=headers)