]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tudou.py
   6 from .common 
import InfoExtractor
 
   9 class TudouIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' 
  12         u
'url': u
'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 
  13         u
'file': u
'159448201.f4v', 
  14         u
'md5': u
'140a49ed444bd22f93330985d8475fcb', 
  16             u
"title": u
"卡马乔国足开大脚长传冲吊集锦" 
  20     def _url_for_id(self
, id, quality 
= None): 
  21         info_url 
= "http://v2.tudou.com/f?id="+str(id) 
  23             info_url 
+= '&hd' + quality
 
  24         webpage 
= self
._download
_webpage
(info_url
, id, "Opening the info webpage") 
  25         final_url 
= self
._html
_search
_regex
('>(.+?)</f>',webpage
, 'video url') 
  28     def _real_extract(self
, url
): 
  29         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  30         video_id 
= mobj
.group(2) 
  31         webpage 
= self
._download
_webpage
(url
, video_id
) 
  32         title 
= re
.search(",kw:\"(.+)\"",webpage
) 
  34             title 
= re
.search(",kw: \'(.+)\'",webpage
) 
  35         title 
= title
.group(1) 
  36         thumbnail_url 
= re
.search(",pic: \'(.+?)\'",webpage
) 
  37         if thumbnail_url 
is None: 
  38             thumbnail_url 
= re
.search(",pic:\"(.+?)\"",webpage
) 
  39         thumbnail_url 
= thumbnail_url
.group(1) 
  41         segs_json 
= self
._search
_regex
(r
'segs: \'(.*)\'', webpage, 'segments
') 
  42         segments = json.loads(segs_json) 
  43         # It looks like the keys are the arguments that have to be passed as 
  44         # the hd field in the request url, we pick the higher 
  45         quality = sorted(segments.keys())[-1] 
  46         parts = segments[quality] 
  48         len_parts = len(parts) 
  50             self.to_screen(u'%s: found 
%s parts
' % (video_id, len_parts)) 
  53             final_url = self._url_for_id(part_id, quality) 
  54             ext = (final_url.split('?
')[0]).split('.')[-1] 
  55             part_info = {'id': part_id, 
  59                           'thumbnail
': thumbnail_url, 
  61             result.append(part_info)