]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tudou.py
   3 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_str
 
  14 class TudouIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})' 
  18         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 
  19         'md5': '140a49ed444bd22f93330985d8475fcb', 
  23             'title': '卡马乔国足开大脚长传冲吊集锦', 
  24             'thumbnail': 're:^https?://.*\.jpg$', 
  25             'timestamp': 1372113489000, 
  26             'description': '卡马乔卡家军,开大脚先进战术不完全集锦!', 
  32         'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', 
  36             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', 
  37             'thumbnail': 're:^https?://.*\.jpg$', 
  38             'timestamp': 1349207518000, 
  39             'description': 'md5:294612423894260f2dcd5c6c04fe248b', 
  46     _PLAYER_URL 
= 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' 
  48     def _url_for_id(self
, video_id
, quality
=None): 
  49         info_url 
= 'http://v2.tudou.com/f?id=' + compat_str(video_id
) 
  51             info_url 
+= '&hd' + quality
 
  52         xml_data 
= self
._download
_xml
(info_url
, video_id
, 'Opening the info XML page') 
  53         final_url 
= xml_data
.text
 
  56     def _real_extract(self
, url
): 
  57         video_id 
= self
._match
_id
(url
) 
  58         item_data 
= self
._download
_json
( 
  59             'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id
, video_id
) 
  61         youku_vcode 
= item_data
.get('vcode') 
  63             return self
.url_result('youku:' + youku_vcode
, ie
='Youku') 
  65         title 
= unescapeHTML(item_data
['kw']) 
  66         description 
= item_data
.get('desc') 
  67         thumbnail_url 
= item_data
.get('pic') 
  68         view_count 
= int_or_none(item_data
.get('playTimes')) 
  69         timestamp 
= int_or_none(item_data
.get('pt')) 
  71         segments 
= self
._parse
_json
(item_data
['itemSegs'], video_id
) 
  72         # It looks like the keys are the arguments that have to be passed as 
  73         # the hd field in the request url, we pick the higher 
  74         # Also, filter non-number qualities (see issue #3643). 
  75         quality 
= sorted(filter(lambda k
: k
.isdigit(), segments
.keys()), 
  76                          key
=lambda k
: int(k
))[-1] 
  77         parts 
= segments
[quality
] 
  79         len_parts 
= len(parts
) 
  81             self
.to_screen('%s: found %s parts' % (video_id
, len_parts
)) 
  84             final_url 
= self
._url
_for
_id
(part_id
, quality
) 
  85             ext 
= (final_url
.split('?')[0]).split('.')[-1] 
  91                 'thumbnail': thumbnail_url
, 
  92                 'description': description
, 
  93                 'view_count': view_count
, 
  94                 'timestamp': timestamp
, 
  95                 'duration': float_or_none(part
.get('seconds'), 1000), 
  96                 'filesize': int_or_none(part
.get('size')), 
  98                     'Referer': self
._PLAYER
_URL
, 
 101             result
.append(part_info
) 
 104             '_type': 'multi_video', 
 111 class TudouPlaylistIE(InfoExtractor
): 
 112     IE_NAME 
= 'tudou:playlist' 
 113     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html' 
 115         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html', 
 119         'playlist_mincount': 209, 
 122     def _real_extract(self
, url
): 
 123         playlist_id 
= self
._match
_id
(url
) 
 124         playlist_data 
= self
._download
_json
( 
 125             'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id
, playlist_id
) 
 126         entries 
= [self
.url_result( 
 127             'http://www.tudou.com/programs/view/%s' % item
['icode'], 
 128             'Tudou', item
['icode'], 
 129             item
['kw']) for item 
in playlist_data
['items']] 
 130         return self
.playlist_result(entries
, playlist_id
) 
 133 class TudouAlbumIE(InfoExtractor
): 
 134     IE_NAME 
= 'tudou:album' 
 135     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})' 
 137         'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html', 
 141         'playlist_mincount': 45, 
 144     def _real_extract(self
, url
): 
 145         album_id 
= self
._match
_id
(url
) 
 146         album_data 
= self
._download
_json
( 
 147             'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id
, album_id
) 
 148         entries 
= [self
.url_result( 
 149             'http://www.tudou.com/programs/view/%s' % item
['icode'], 
 150             'Tudou', item
['icode'], 
 151             item
['kw']) for item 
in album_data
['items']] 
 152         return self
.playlist_result(entries
, album_id
)