]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/weibo.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  21 class WeiboIE(InfoExtractor
): 
  22     _VALID_URL 
= r
'https?://weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)' 
  24         'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment', 
  28             'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博', 
  32     def _real_extract(self
, url
): 
  33         video_id 
= self
._match
_id
(url
) 
  34         # to get Referer url for genvisitor 
  35         webpage
, urlh 
= self
._download
_webpage
_handle
(url
, video_id
) 
  37         visitor_url 
= urlh
.geturl() 
  39         if 'passport.weibo.com' in visitor_url
: 
  41             visitor_data 
= self
._download
_json
( 
  42                 'https://passport.weibo.com/visitor/genvisitor', video_id
, 
  43                 note
='Generating first-visit data', 
  44                 transform_source
=strip_jsonp
, 
  45                 headers
={'Referer': visitor_url
}, 
  46                 data
=urlencode_postdata({ 
  50                         'browser': 'Gecko57,0,0,0', 
  52                         'screenInfo': '1440*900*24', 
  57             tid 
= visitor_data
['data']['tid'] 
  58             cnfd 
= '%03d' % visitor_data
['data']['confidence'] 
  60             self
._download
_webpage
( 
  61                 'https://passport.weibo.com/visitor/visitor', video_id
, 
  62                 note
='Running first-visit callback', 
  70                     '_rand': random
.random(), 
  73             webpage 
= self
._download
_webpage
( 
  74                 url
, video_id
, note
='Revisiting webpage') 
  76         title 
= self
._html
_search
_regex
( 
  77             r
'<title>(.+?)</title>', webpage
, 'title') 
  79         video_formats 
= compat_parse_qs(self
._search
_regex
( 
  80             r
'video-sources=\\\"(.+?)\"', webpage
, 'video_sources')) 
  83         supported_resolutions 
= (480, 720) 
  84         for res 
in supported_resolutions
: 
  85             vid_urls 
= video_formats
.get(compat_str(res
)) 
  86             if not vid_urls 
or not isinstance(vid_urls
, list): 
  95         self
._sort
_formats
(formats
) 
  97         uploader 
= self
._og
_search
_property
( 
  98             'nick-name', webpage
, 'uploader', default
=None) 
 103             'uploader': uploader
, 
 108 class WeiboMobileIE(InfoExtractor
): 
 109     _VALID_URL 
= r
'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?' 
 111         'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0', 
 113             'id': '4189191225395228', 
 115             'title': '午睡当然是要甜甜蜜蜜的啦', 
 120     def _real_extract(self
, url
): 
 121         video_id 
= self
._match
_id
(url
) 
 122         # to get Referer url for genvisitor 
 123         webpage 
= self
._download
_webpage
(url
, video_id
, note
='visit the page') 
 125         weibo_info 
= self
._parse
_json
(self
._search
_regex
( 
 126             r
'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};', 
 127             webpage
, 'js_code', flags
=re
.DOTALL
), 
 128             video_id
, transform_source
=js_to_json
) 
 130         status_data 
= weibo_info
.get('status', {}) 
 131         page_info 
= status_data
.get('page_info') 
 132         title 
= status_data
['status_title'] 
 133         uploader 
= status_data
.get('user', {}).get('screen_name') 
 138             'uploader': uploader
, 
 139             'url': page_info
['media_info']['stream_url']