]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tudou.py
bb8b8e23424e7943f2133028aca187d4fcffeab9
   3 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_str
 
  16 class TudouIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})' 
  20         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 
  21         'md5': '140a49ed444bd22f93330985d8475fcb', 
  25             'title': '卡马乔国足开大脚长传冲吊集锦', 
  26             'thumbnail': 're:^https?://.*\.jpg$', 
  27             'timestamp': 1372113489000, 
  28             'description': '卡马乔卡家军,开大脚先进战术不完全集锦!', 
  34         'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', 
  38             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', 
  39             'thumbnail': 're:^https?://.*\.jpg$', 
  40             'timestamp': 1349207518000, 
  41             'description': 'md5:294612423894260f2dcd5c6c04fe248b', 
  48     _PLAYER_URL 
= 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' 
  50     # Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf 
  51     # 0001, 0002 and 4001 are not included as they indicate temporary issues 
  53         '0003': 'The video is deleted or does not exist', 
  54         '1001': 'This video is unavailable due to licensing issues', 
  55         '1002': 'This video is unavailable as it\'s under review', 
  56         '1003': 'This video is unavailable as it\'s under review', 
  57         '3001': 'Password required', 
  58         '5001': 'This video is available in Mainland China only due to licensing issues', 
  59         '7001': 'This video is unavailable', 
  60         '8001': 'This video is unavailable due to licensing issues', 
  63     def _url_for_id(self
, video_id
, quality
=None): 
  64         info_url 
= 'http://v2.tudou.com/f?id=' + compat_str(video_id
) 
  66             info_url 
+= '&hd' + quality
 
  67         xml_data 
= self
._download
_xml
(info_url
, video_id
, 'Opening the info XML page') 
  68         error 
= xml_data
.attrib
.get('error') 
  70             raise ExtractorError('Tudou said: %s' % error
, expected
=True) 
  71         final_url 
= xml_data
.text
 
  74     def _real_extract(self
, url
): 
  75         video_id 
= self
._match
_id
(url
) 
  76         item_data 
= self
._download
_json
( 
  77             'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id
, video_id
) 
  79         youku_vcode 
= item_data
.get('vcode') 
  81             return self
.url_result('youku:' + youku_vcode
, ie
='Youku') 
  83         if not item_data
.get('itemSegs'): 
  84             tvc_code 
= item_data
.get('tvcCode') 
  86                 err_msg 
= self
.TVC_ERRORS
.get(tvc_code
) 
  88                     raise ExtractorError('Tudou said: %s' % err_msg
, expected
=True) 
  89                 raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code
) 
  90             raise ExtractorError('Unxpected error returned from Tudou') 
  92         title 
= unescapeHTML(item_data
['kw']) 
  93         description 
= item_data
.get('desc') 
  94         thumbnail_url 
= item_data
.get('pic') 
  95         view_count 
= int_or_none(item_data
.get('playTimes')) 
  96         timestamp 
= int_or_none(item_data
.get('pt')) 
  98         segments 
= self
._parse
_json
(item_data
['itemSegs'], video_id
) 
  99         # It looks like the keys are the arguments that have to be passed as 
 100         # the hd field in the request url, we pick the higher 
 101         # Also, filter non-number qualities (see issue #3643). 
 102         quality 
= sorted(filter(lambda k
: k
.isdigit(), segments
.keys()), 
 103                          key
=lambda k
: int(k
))[-1] 
 104         parts 
= segments
[quality
] 
 105         len_parts 
= len(parts
) 
 107             self
.to_screen('%s: found %s parts' % (video_id
, len_parts
)) 
 109         def part_func(partnum
): 
 110             part 
= parts
[partnum
] 
 112             final_url 
= self
._url
_for
_id
(part_id
, quality
) 
 113             ext 
= (final_url
.split('?')[0]).split('.')[-1] 
 115                 'id': '%s' % part_id
, 
 119                 'thumbnail': thumbnail_url
, 
 120                 'description': description
, 
 121                 'view_count': view_count
, 
 122                 'timestamp': timestamp
, 
 123                 'duration': float_or_none(part
.get('seconds'), 1000), 
 124                 'filesize': int_or_none(part
.get('size')), 
 126                     'Referer': self
._PLAYER
_URL
, 
 130         entries 
= InAdvancePagedList(part_func
, len_parts
, 1) 
 133             '_type': 'multi_video', 
 140 class TudouPlaylistIE(InfoExtractor
): 
 141     IE_NAME 
= 'tudou:playlist' 
 142     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html' 
 144         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html', 
 148         'playlist_mincount': 209, 
 151     def _real_extract(self
, url
): 
 152         playlist_id 
= self
._match
_id
(url
) 
 153         playlist_data 
= self
._download
_json
( 
 154             'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id
, playlist_id
) 
 155         entries 
= [self
.url_result( 
 156             'http://www.tudou.com/programs/view/%s' % item
['icode'], 
 157             'Tudou', item
['icode'], 
 158             item
['kw']) for item 
in playlist_data
['items']] 
 159         return self
.playlist_result(entries
, playlist_id
) 
 162 class TudouAlbumIE(InfoExtractor
): 
 163     IE_NAME 
= 'tudou:album' 
 164     _VALID_URL 
= r
'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})' 
 166         'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html', 
 170         'playlist_mincount': 45, 
 173     def _real_extract(self
, url
): 
 174         album_id 
= self
._match
_id
(url
) 
 175         album_data 
= self
._download
_json
( 
 176             'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id
, album_id
) 
 177         entries 
= [self
.url_result( 
 178             'http://www.tudou.com/programs/view/%s' % item
['icode'], 
 179             'Tudou', item
['icode'], 
 180             item
['kw']) for item 
in album_data
['items']] 
 181         return self
.playlist_result(entries
, album_id
)