]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youku.py
69ecc837a4d6d94c82a1055cb7cc41d6e6663763
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class YoukuIE(InfoExtractor
): 
  22             http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| 
  24         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) 
  28         'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', 
  29         'md5': '5f3af4192eabacc4501508d54a8cabd7', 
  31             'id': 'XMTc1ODE5Njcy_part1', 
  32             'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', 
  36         'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', 
  37         'only_matching': True, 
  39         'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', 
  41             'id': 'XODgxNjg1Mzk2', 
  46         'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', 
  48             'id': 'XMTI1OTczNDM5Mg', 
  52         'skip': 'Available in China only', 
  54         'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html', 
  55         'note': 'Video protected with password', 
  57             'id': 'XNjA1NzA2Njgw', 
  58             'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起', 
  62             'videopassword': '100600', 
  66     def construct_video_urls(self
, data1
, data2
): 
  72                 t 
= (t 
+ ls
[i
] + compat_ord(s1
[i 
% len(s1
)])) % 256 
  73                 ls
[i
], ls
[t
] = ls
[t
], ls
[i
] 
  76             for i 
in range(len(s2
)): 
  79                 ls
[x
], ls
[y
] = ls
[y
], ls
[x
] 
  80                 s
.append(compat_ord(s2
[i
]) ^ ls
[(ls
[x
] + ls
[y
]) % 256]) 
  84             b
'becaf9be', base64
.b64decode(data2
['ep'].encode('ascii')) 
  85         ).decode('ascii').split('_') 
  92             'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890') 
  93         shuffled_string_ls 
= [] 
  97             seed 
= (seed 
* 0xd3 + 0x754f) % 0x10000 
  98             idx 
= seed 
* len(string_ls
) // 0x10000 
  99             shuffled_string_ls
.append(string_ls
[idx
]) 
 103         for format 
in data1
['streamtypes']: 
 105                 int(i
) for i 
in data1
['streamfileids'][format
].strip('*').split('*')] 
 107                 [shuffled_string_ls
[i
] for i 
in streamfileid
]) 
 108             fileid_dict
[format
] = fileid
[:8] + '%s' + fileid
[10:] 
 110         def get_fileid(format
, n
): 
 111             fileid 
= fileid_dict
[format
] % hex(int(n
))[2:].upper().zfill(2) 
 115         def generate_ep(format
, n
): 
 116             fileid 
= get_fileid(format
, n
) 
 119                 ('%s_%s_%s' % (sid
, fileid
, token
)).encode('ascii') 
 121             ep 
= base64
.b64encode(ep_t
).decode('ascii') 
 124         # generate video_urls 
 126         for format 
in data1
['streamtypes']: 
 128             for dt 
in data1
['segs'][format
]: 
 129                 n 
= str(int(dt
['no'])) 
 132                     'hd': self
.get_hd(format
), 
 140                     'ep': generate_ep(format
, n
) 
 143                     'http://k.youku.com/player/getFlvPath/' + \
 
 145                     '_' + str(int(n
) + 1).zfill(2) + \
 
 146                     '/st/' + self
.parse_ext_l(format
) + \
 
 147                     '/fileid/' + get_fileid(format
, n
) + '?' + \
 
 148                     compat_urllib_parse
.urlencode(param
) 
 149                 video_urls
.append(video_url
) 
 150             video_urls_dict
[format
] = video_urls
 
 152         return video_urls_dict
 
 154     def get_hd(self
, fm
): 
 163         return hd_id_dict
[fm
] 
 165     def parse_ext_l(self
, fm
): 
 176     def get_format_name(self
, fm
): 
 187     def _real_extract(self
, url
): 
 188         video_id 
= self
._match
_id
(url
) 
 190         def retrieve_data(req_url
, note
): 
 191             req 
= sanitized_Request(req_url
) 
 193             cn_verification_proxy 
= self
._downloader
.params
.get('cn_verification_proxy') 
 194             if cn_verification_proxy
: 
 195                 req
.add_header('Ytdl-request-proxy', cn_verification_proxy
) 
 197             raw_data 
= self
._download
_json
(req
, video_id
, note
=note
) 
 198             return raw_data
['data'][0] 
 200         video_password 
= self
._downloader
.params
.get('videopassword', None) 
 203         basic_data_url 
= 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
 
 205             basic_data_url 
+= '?password=%s' % video_password
 
 207         data1 
= retrieve_data( 
 209             'Downloading JSON metadata 1') 
 210         data2 
= retrieve_data( 
 211             'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
, 
 212             'Downloading JSON metadata 2') 
 214         error_code 
= data1
.get('error_code') 
 216             error 
= data1
.get('error') 
 217             if error 
is not None and '因版权原因无法观看此视频' in error
: 
 218                 raise ExtractorError( 
 219                     'Youku said: Sorry, this video is available in China only', expected
=True) 
 221                 msg 
= 'Youku server reported error %i' % error_code
 
 222                 if error 
is not None: 
 224                 raise ExtractorError(msg
) 
 226         title 
= data1
['title'] 
 228         # generate video_urls_dict 
 229         video_urls_dict 
= self
.construct_video_urls(data1
, data2
) 
 233             'id': '%s_part%d' % (video_id
, i 
+ 1), 
 236             # some formats are not available for all parts, we have to detect 
 238         } for i 
in range(max(len(v
) for v 
in data1
['segs'].values()))] 
 239         for fm 
in data1
['streamtypes']: 
 240             video_urls 
= video_urls_dict
[fm
] 
 241             for video_url
, seg
, entry 
in zip(video_urls
, data1
['segs'][fm
], entries
): 
 242                 entry
['formats'].append({ 
 244                     'format_id': self
.get_format_name(fm
), 
 245                     'ext': self
.parse_ext_l(fm
), 
 246                     'filesize': int(seg
['size']), 
 250             '_type': 'multi_video',