Imported Upstream version 2013.12.04

[youtubedl] / youtube_dl / extractor / googleplus.py
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py

index ff2cdeebb4e9dde5f1ff8e3a7c7bb0c00b48f048..2570746b2047a1d1ae0a60b48970b1414f168e40 100644 (file)
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -1,3 +1,5 @@
+# coding: utf-8
+
  import datetime
  import re
  
@@ -8,10 +10,18 @@ from ..utils import (
  
  
  class GooglePlusIE(InfoExtractor):
-    """Information extractor for plus.google.com."""
-
+    IE_DESC = u'Google Plus'
      _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
      IE_NAME = u'plus.google'
+    _TEST = {
+        u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
+        u"file": u"ZButuJc6CtH.flv",
+        u"info_dict": {
+            u"upload_date": u"20120613",
+            u"uploader": u"井上ヨシマサ",
+            u"title": u"嘆きの天使 降臨"
+        }
+    }
  
      def _real_extract(self, url):
          # Extract id from URL
@@ -30,8 +40,10 @@ class GooglePlusIE(InfoExtractor):
          self.report_extraction(video_id)
  
          # Extract update date
-        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
-            webpage, u'upload date', fatal=False)
+        upload_date = self._html_search_regex(
+            r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
+                    ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
+            webpage, u'upload date', fatal=False, flags=re.VERBOSE)
          if upload_date:
              # Convert timestring to a format suitable for filename
              upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
@@ -47,8 +59,8 @@ class GooglePlusIE(InfoExtractor):
              webpage, 'title', default=u'NA')
  
          # Step 2, Simulate clicking the image box to launch video
-        DOMAIN = 'https://plus.google.com'
-        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
+        DOMAIN = 'https://plus.google.com/'
+        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
              webpage, u'video page URL')
          if not video_page.startswith(DOMAIN):
              video_page = DOMAIN + video_page