]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youku.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import ExtractorError
 
  12     compat_urllib_request
, 
  16 class YoukuIE(InfoExtractor
): 
  21             http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| 
  23         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) 
  27         'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', 
  28         'md5': '5f3af4192eabacc4501508d54a8cabd7', 
  30             'id': 'XMTc1ODE5Njcy_part1', 
  31             'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', 
  35         'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', 
  36         'only_matching': True, 
  38         'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', 
  40             'id': 'XODgxNjg1Mzk2', 
  45         'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', 
  47             'id': 'XMTI1OTczNDM5Mg', 
  51         'skip': 'Available in China only', 
  53         'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html', 
  54         'note': 'Video protected with password', 
  56             'id': 'XNjA1NzA2Njgw', 
  57             'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起', 
  61             'videopassword': '100600', 
  65     def construct_video_urls(self
, data1
, data2
): 
  71                 t 
= (t 
+ ls
[i
] + compat_ord(s1
[i 
% len(s1
)])) % 256 
  72                 ls
[i
], ls
[t
] = ls
[t
], ls
[i
] 
  75             for i 
in range(len(s2
)): 
  78                 ls
[x
], ls
[y
] = ls
[y
], ls
[x
] 
  79                 s
.append(compat_ord(s2
[i
]) ^ ls
[(ls
[x
] + ls
[y
]) % 256]) 
  83             b
'becaf9be', base64
.b64decode(data2
['ep'].encode('ascii')) 
  84         ).decode('ascii').split('_') 
  91             'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890') 
  92         shuffled_string_ls 
= [] 
  96             seed 
= (seed 
* 0xd3 + 0x754f) % 0x10000 
  97             idx 
= seed 
* len(string_ls
) // 0x10000 
  98             shuffled_string_ls
.append(string_ls
[idx
]) 
 102         for format 
in data1
['streamtypes']: 
 104                 int(i
) for i 
in data1
['streamfileids'][format
].strip('*').split('*')] 
 106                 [shuffled_string_ls
[i
] for i 
in streamfileid
]) 
 107             fileid_dict
[format
] = fileid
[:8] + '%s' + fileid
[10:] 
 109         def get_fileid(format
, n
): 
 110             fileid 
= fileid_dict
[format
] % hex(int(n
))[2:].upper().zfill(2) 
 114         def generate_ep(format
, n
): 
 115             fileid 
= get_fileid(format
, n
) 
 118                 ('%s_%s_%s' % (sid
, fileid
, token
)).encode('ascii') 
 120             ep 
= base64
.b64encode(ep_t
).decode('ascii') 
 123         # generate video_urls 
 125         for format 
in data1
['streamtypes']: 
 127             for dt 
in data1
['segs'][format
]: 
 128                 n 
= str(int(dt
['no'])) 
 131                     'hd': self
.get_hd(format
), 
 139                     'ep': generate_ep(format
, n
) 
 142                     'http://k.youku.com/player/getFlvPath/' + \
 
 144                     '_' + str(int(n
) + 1).zfill(2) + \
 
 145                     '/st/' + self
.parse_ext_l(format
) + \
 
 146                     '/fileid/' + get_fileid(format
, n
) + '?' + \
 
 147                     compat_urllib_parse
.urlencode(param
) 
 148                 video_urls
.append(video_url
) 
 149             video_urls_dict
[format
] = video_urls
 
 151         return video_urls_dict
 
 153     def get_hd(self
, fm
): 
 162         return hd_id_dict
[fm
] 
 164     def parse_ext_l(self
, fm
): 
 175     def get_format_name(self
, fm
): 
 186     def _real_extract(self
, url
): 
 187         video_id 
= self
._match
_id
(url
) 
 189         def retrieve_data(req_url
, note
): 
 190             req 
= compat_urllib_request
.Request(req_url
) 
 192             cn_verification_proxy 
= self
._downloader
.params
.get('cn_verification_proxy') 
 193             if cn_verification_proxy
: 
 194                 req
.add_header('Ytdl-request-proxy', cn_verification_proxy
) 
 196             raw_data 
= self
._download
_json
(req
, video_id
, note
=note
) 
 197             return raw_data
['data'][0] 
 199         video_password 
= self
._downloader
.params
.get('videopassword', None) 
 202         basic_data_url 
= 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
 
 204             basic_data_url 
+= '?password=%s' % video_password
 
 206         data1 
= retrieve_data( 
 208             'Downloading JSON metadata 1') 
 209         data2 
= retrieve_data( 
 210             'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
, 
 211             'Downloading JSON metadata 2') 
 213         error_code 
= data1
.get('error_code') 
 215             error 
= data1
.get('error') 
 216             if error 
is not None and '因版权原因无法观看此视频' in error
: 
 217                 raise ExtractorError( 
 218                     'Youku said: Sorry, this video is available in China only', expected
=True) 
 220                 msg 
= 'Youku server reported error %i' % error_code
 
 221                 if error 
is not None: 
 223                 raise ExtractorError(msg
) 
 225         title 
= data1
['title'] 
 227         # generate video_urls_dict 
 228         video_urls_dict 
= self
.construct_video_urls(data1
, data2
) 
 232             'id': '%s_part%d' % (video_id
, i 
+ 1), 
 235             # some formats are not available for all parts, we have to detect 
 237         } for i 
in range(max(len(v
) for v 
in data1
['segs'].values()))] 
 238         for fm 
in data1
['streamtypes']: 
 239             video_urls 
= video_urls_dict
[fm
] 
 240             for video_url
, seg
, entry 
in zip(video_urls
, data1
['segs'][fm
], entries
): 
 241                 entry
['formats'].append({ 
 243                     'format_id': self
.get_format_name(fm
), 
 244                     'ext': self
.parse_ext_l(fm
), 
 245                     'filesize': int(seg
['size']), 
 249             '_type': 'multi_video',