]> Raphaël G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/googleplus.py
debian/changelog: Annotate with bugs being closed.
[youtubedl] / youtube_dl / extractor / googleplus.py
index ff2cdeebb4e9dde5f1ff8e3a7c7bb0c00b48f048..cc29a7e5df2059096afe89ecf7175f317a755e94 100644 (file)
@@ -1,3 +1,6 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
 import datetime
 import re
 
 import datetime
 import re
 
@@ -8,30 +11,36 @@ from ..utils import (
 
 
 class GooglePlusIE(InfoExtractor):
 
 
 class GooglePlusIE(InfoExtractor):
-    """Information extractor for plus.google.com."""
-
-    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
-    IE_NAME = u'plus.google'
+    IE_DESC = 'Google Plus'
+    _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
+    IE_NAME = 'plus.google'
+    _TEST = {
+        'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
+        'info_dict': {
+            'id': 'ZButuJc6CtH',
+            'ext': 'flv',
+            'upload_date': '20120613',
+            'uploader': '井上ヨシマサ',
+            'title': '嘆きの天使 降臨',
+        }
+    }
 
     def _real_extract(self, url):
         # Extract id from URL
         mobj = re.match(self._VALID_URL, url)
 
     def _real_extract(self, url):
         # Extract id from URL
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
 
 
-        post_url = mobj.group(0)
-        video_id = mobj.group(1)
-
-        video_extension = 'flv'
+        video_id = mobj.group('id')
 
         # Step 1, Retrieve post webpage to extract further information
 
         # Step 1, Retrieve post webpage to extract further information
-        webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
+        webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
 
         self.report_extraction(video_id)
 
         # Extract update date
 
         self.report_extraction(video_id)
 
         # Extract update date
-        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
-            webpage, u'upload date', fatal=False)
+        upload_date = self._html_search_regex(
+            r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
+                    ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
+            webpage, 'upload date', fatal=False, flags=re.VERBOSE)
         if upload_date:
             # Convert timestring to a format suitable for filename
             upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
         if upload_date:
             # Convert timestring to a format suitable for filename
             upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
@@ -39,28 +48,27 @@ class GooglePlusIE(InfoExtractor):
 
         # Extract uploader
         uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
 
         # Extract uploader
         uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
-            webpage, u'uploader', fatal=False)
+            webpage, 'uploader', fatal=False)
 
         # Extract title
         # Get the first line for title
         video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
 
         # Extract title
         # Get the first line for title
         video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
-            webpage, 'title', default=u'NA')
+            webpage, 'title', default='NA')
 
         # Step 2, Simulate clicking the image box to launch video
 
         # Step 2, Simulate clicking the image box to launch video
-        DOMAIN = 'https://plus.google.com'
-        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
-            webpage, u'video page URL')
+        DOMAIN = 'https://plus.google.com/'
+        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
+            webpage, 'video page URL')
         if not video_page.startswith(DOMAIN):
             video_page = DOMAIN + video_page
 
         if not video_page.startswith(DOMAIN):
             video_page = DOMAIN + video_page
 
-        webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
+        webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
 
 
-        # Extract video links on video page
-        """Extract video links of all sizes"""
+        # Extract video links all sizes
         pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
         mobj = re.findall(pattern, webpage)
         if len(mobj) == 0:
         pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
         mobj = re.findall(pattern, webpage)
         if len(mobj) == 0:
-            raise ExtractorError(u'Unable to extract video links')
+            raise ExtractorError('Unable to extract video links')
 
         # Sort in resolution
         links = sorted(mobj)
 
         # Sort in resolution
         links = sorted(mobj)
@@ -75,12 +83,11 @@ class GooglePlusIE(InfoExtractor):
         except AttributeError: # Python 3
             video_url = bytes(video_url, 'ascii').decode('unicode-escape')
 
         except AttributeError: # Python 3
             video_url = bytes(video_url, 'ascii').decode('unicode-escape')
 
-
-        return [{
-            'id':       video_id,
-            'url':      video_url,
+        return {
+            'id': video_id,
+            'url': video_url,
             'uploader': uploader,
             'uploader': uploader,
-            'upload_date':  upload_date,
-            'title':    video_title,
-            'ext':      video_extension,
-        }]
+            'upload_date': upload_date,
+            'title': video_title,
+            'ext': 'flv',
+        }