]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tudou.py
   3 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  11 class TudouIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])' 
  14         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 
  15         'md5': '140a49ed444bd22f93330985d8475fcb', 
  19             'title': '卡马乔国足开大脚长传冲吊集锦', 
  20             'thumbnail': 're:^https?://.*\.jpg$', 
  23         'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', 
  27             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', 
  28             'thumbnail': 're:^https?://.*\.jpg$', 
  32     def _url_for_id(self
, id, quality
=None): 
  33         info_url 
= "http://v2.tudou.com/f?id=" + str(id) 
  35             info_url 
+= '&hd' + quality
 
  36         webpage 
= self
._download
_webpage
(info_url
, id, "Opening the info webpage") 
  37         final_url 
= self
._html
_search
_regex
('>(.+?)</f>', webpage
, 'video url') 
  40     def _real_extract(self
, url
): 
  41         video_id 
= self
._match
_id
(url
) 
  42         webpage 
= self
._download
_webpage
(url
, video_id
) 
  44         m 
= re
.search(r
'vcode:\s*[\'"](.+?)[\'"]', webpage) 
  48                 'url
': 'youku
:' + m.group(1), 
  52         title = self._search_regex( 
  53             r",kw:\s*['\"](.+?
)[\"']", webpage, 'title
') 
  54         thumbnail_url = self._search_regex( 
  55             r",pic:\s*[\"'](.+?
)[\"']", webpage, 'thumbnail URL
', fatal=False) 
  57         segs_json = self._search_regex(r'segs
: \'(.*)\'', webpage, 'segments
') 
  58         segments = json.loads(segs_json) 
  59         # It looks like the keys are the arguments that have to be passed as 
  60         # the hd field in the request url, we pick the higher 
  61         # Also, filter non-number qualities (see issue #3643). 
  62         quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), 
  63                          key=lambda k: int(k))[-1] 
  64         parts = segments[quality] 
  66         len_parts = len(parts) 
  68             self.to_screen('%s: found 
%s parts
' % (video_id, len_parts)) 
  71             final_url = self._url_for_id(part_id, quality) 
  72             ext = (final_url.split('?
')[0]).split('.')[-1] 
  78                 'thumbnail
': thumbnail_url, 
  80             result.append(part_info) 
  83             '_type
': 'multi_video
',