]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tudou.py
5f7ac4b35b6c4576e5fb0998d56eb21ec522b58a
   3 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_str
 
   9 class TudouIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])' 
  12         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 
  13         'md5': '140a49ed444bd22f93330985d8475fcb', 
  17             'title': '卡马乔国足开大脚长传冲吊集锦', 
  18             'thumbnail': 're:^https?://.*\.jpg$', 
  21         'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', 
  25             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', 
  26             'thumbnail': 're:^https?://.*\.jpg$', 
  29         'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html', 
  30         'only_matching': True, 
  33     _PLAYER_URL 
= 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' 
  35     def _url_for_id(self
, video_id
, quality
=None): 
  36         info_url 
= 'http://v2.tudou.com/f?id=' + compat_str(video_id
) 
  38             info_url 
+= '&hd' + quality
 
  39         xml_data 
= self
._download
_xml
(info_url
, video_id
, "Opening the info XML page") 
  40         final_url 
= xml_data
.text
 
  43     def _real_extract(self
, url
): 
  44         video_id 
= self
._match
_id
(url
) 
  45         webpage 
= self
._download
_webpage
(url
, video_id
) 
  47         youku_vcode 
= self
._search
_regex
( 
  48             r
'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None) 
  50             return self.url_result('youku:' + youku_vcode, ie='Youku') 
  52         title = self._search_regex( 
  53             r',kw\s*:\s*[\'"]([^
\'"]+)[\'"]', webpage, 'title
') 
  54         thumbnail_url = self._search_regex( 
  55             r',pic\s
*:\s
*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False) 
  57         player_url = self._search_regex( 
  58             r'playerUrl\s*:\s*[\'"]([^
\'"]+\.swf)[\'"]', 
  59             webpage, 'player URL
', default=self._PLAYER_URL) 
  61         segments = self._parse_json(self._search_regex( 
  62             r'segs
: \'([^
\']+)\'', webpage, 'segments
'), video_id) 
  63         # It looks like the keys are the arguments that have to be passed as 
  64         # the hd field in the request url, we pick the higher 
  65         # Also, filter non-number qualities (see issue #3643). 
  66         quality = sorted(filter(lambda k: k.isdigit(), segments.keys()), 
  67                          key=lambda k: int(k))[-1] 
  68         parts = segments[quality] 
  70         len_parts = len(parts) 
  72             self.to_screen('%s: found 
%s parts
' % (video_id, len_parts)) 
  75             final_url = self._url_for_id(part_id, quality) 
  76             ext = (final_url.split('?
')[0]).split('.')[-1] 
  82                 'thumbnail
': thumbnail_url, 
  84                     'Referer
': player_url, 
  87             result.append(part_info) 
  90             '_type
': 'multi_video
',