]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tudou.py
7a3891b89b736fb05f4c09d441d7eb56e68d8dcd
   6 from .common 
import InfoExtractor
 
   9 class TudouIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' 
  12         u
'url': u
'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 
  13         u
'file': u
'159448201.f4v', 
  14         u
'md5': u
'140a49ed444bd22f93330985d8475fcb', 
  16             u
"title": u
"卡马乔国足开大脚长传冲吊集锦" 
  20         u
'url': u
'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', 
  24             u
'title': u
'todo.mp4', 
  26         u
'add_ie': [u
'Youku'], 
  27         u
'skip': u
'Only works from China' 
  30     def _url_for_id(self
, id, quality 
= None): 
  31         info_url 
= "http://v2.tudou.com/f?id="+str(id) 
  33             info_url 
+= '&hd' + quality
 
  34         webpage 
= self
._download
_webpage
(info_url
, id, "Opening the info webpage") 
  35         final_url 
= self
._html
_search
_regex
('>(.+?)</f>',webpage
, 'video url') 
  38     def _real_extract(self
, url
): 
  39         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  40         video_id 
= mobj
.group(2) 
  41         webpage 
= self
._download
_webpage
(url
, video_id
) 
  43         m 
= re
.search(r
'vcode:\s*[\'"](.+?)[\'"]', webpage) 
  47                 'url
': u'youku
:' + m.group(1), 
  51         title = self._search_regex( 
  52             r",kw:\s*['\"](.+?
)[\"']", webpage, u'title
') 
  53         thumbnail_url = self._search_regex( 
  54             r",pic:\s*[\"'](.+?
)[\"']", webpage, u'thumbnail URL
', fatal=False) 
  56         segs_json = self._search_regex(r'segs
: \'(.*)\'', webpage, 'segments
') 
  57         segments = json.loads(segs_json) 
  58         # It looks like the keys are the arguments that have to be passed as 
  59         # the hd field in the request url, we pick the higher 
  60         quality = sorted(segments.keys())[-1] 
  61         parts = segments[quality] 
  63         len_parts = len(parts) 
  65             self.to_screen(u'%s: found 
%s parts
' % (video_id, len_parts)) 
  68             final_url = self._url_for_id(part_id, quality) 
  69             ext = (final_url.split('?
')[0]).split('.')[-1] 
  70             part_info = {'id': part_id, 
  74                           'thumbnail
': thumbnail_url, 
  76             result.append(part_info)