]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/googleplus.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
   8 from ..utils 
import unified_strdate
 
  11 class GooglePlusIE(InfoExtractor
): 
  12     IE_DESC 
= 'Google Plus' 
  13     _VALID_URL 
= r
'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' 
  14     IE_NAME 
= 'plus.google' 
  16         'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', 
  21             'upload_date': '20120613', 
  26     def _real_extract(self
, url
): 
  27         video_id 
= self
._match
_id
(url
) 
  29         # Step 1, Retrieve post webpage to extract further information 
  30         webpage 
= self
._download
_webpage
(url
, video_id
, 'Downloading entry webpage') 
  32         title 
= self
._og
_search
_description
(webpage
).splitlines()[0] 
  33         upload_date 
= unified_strdate(self
._html
_search
_regex
( 
  34             r
'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*> 
  35                     ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', 
  36             webpage
, 'upload date', fatal
=False, flags
=re
.VERBOSE
)) 
  37         uploader 
= self
._html
_search
_regex
( 
  38             r
'rel="author".*?>(.*?)</a>', webpage
, 'uploader', fatal
=False) 
  40         # Step 2, Simulate clicking the image box to launch video 
  41         DOMAIN 
= 'https://plus.google.com/' 
  42         video_page 
= self
._search
_regex
( 
  43             r
'<a href="((?:%s)?photos/.*?)"' % re
.escape(DOMAIN
), 
  44             webpage
, 'video page URL') 
  45         if not video_page
.startswith(DOMAIN
): 
  46             video_page 
= DOMAIN 
+ video_page
 
  48         webpage 
= self
._download
_webpage
(video_page
, video_id
, 'Downloading video page') 
  50         def unicode_escape(s
): 
  51             decoder 
= codecs
.getdecoder('unicode_escape') 
  53                 r
'\\u[0-9a-fA-F]{4,}', 
  54                 lambda m
: decoder(m
.group(0))[0], 
  57         # Extract video links all sizes 
  59             'url': unicode_escape(video_url
), 
  62             'height': int(height
), 
  63         } for width
, height
, video_url 
in re
.findall( 
  64             r
'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage
)] 
  65         self
._sort
_formats
(formats
) 
  71             'upload_date': upload_date
,