]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youku.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import ExtractorError
 
  12     compat_urllib_request
, 
  16 class YoukuIE(InfoExtractor
): 
  21             http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| 
  23         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) 
  27         'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', 
  28         'md5': '5f3af4192eabacc4501508d54a8cabd7', 
  30             'id': 'XMTc1ODE5Njcy_part1', 
  31             'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', 
  35         'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', 
  36         'only_matching': True, 
  38         'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', 
  40             'id': 'XODgxNjg1Mzk2', 
  45         'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', 
  47             'id': 'XMTI1OTczNDM5Mg', 
  51         'skip': 'Available in China only', 
  54     def construct_video_urls(self
, data1
, data2
): 
  60                 t 
= (t 
+ ls
[i
] + compat_ord(s1
[i 
% len(s1
)])) % 256 
  61                 ls
[i
], ls
[t
] = ls
[t
], ls
[i
] 
  64             for i 
in range(len(s2
)): 
  67                 ls
[x
], ls
[y
] = ls
[y
], ls
[x
] 
  68                 s
.append(compat_ord(s2
[i
]) ^ ls
[(ls
[x
] + ls
[y
]) % 256]) 
  72             b
'becaf9be', base64
.b64decode(data2
['ep'].encode('ascii')) 
  73         ).decode('ascii').split('_') 
  80             'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890') 
  81         shuffled_string_ls 
= [] 
  85             seed 
= (seed 
* 0xd3 + 0x754f) % 0x10000 
  86             idx 
= seed 
* len(string_ls
) // 0x10000 
  87             shuffled_string_ls
.append(string_ls
[idx
]) 
  91         for format 
in data1
['streamtypes']: 
  93                 int(i
) for i 
in data1
['streamfileids'][format
].strip('*').split('*')] 
  95                 [shuffled_string_ls
[i
] for i 
in streamfileid
]) 
  96             fileid_dict
[format
] = fileid
[:8] + '%s' + fileid
[10:] 
  98         def get_fileid(format
, n
): 
  99             fileid 
= fileid_dict
[format
] % hex(int(n
))[2:].upper().zfill(2) 
 103         def generate_ep(format
, n
): 
 104             fileid 
= get_fileid(format
, n
) 
 107                 ('%s_%s_%s' % (sid
, fileid
, token
)).encode('ascii') 
 109             ep 
= base64
.b64encode(ep_t
).decode('ascii') 
 112         # generate video_urls 
 114         for format 
in data1
['streamtypes']: 
 116             for dt 
in data1
['segs'][format
]: 
 117                 n 
= str(int(dt
['no'])) 
 120                     'hd': self
.get_hd(format
), 
 128                     'ep': generate_ep(format
, n
) 
 131                     'http://k.youku.com/player/getFlvPath/' + \
 
 133                     '_' + str(int(n
) + 1).zfill(2) + \
 
 134                     '/st/' + self
.parse_ext_l(format
) + \
 
 135                     '/fileid/' + get_fileid(format
, n
) + '?' + \
 
 136                     compat_urllib_parse
.urlencode(param
) 
 137                 video_urls
.append(video_url
) 
 138             video_urls_dict
[format
] = video_urls
 
 140         return video_urls_dict
 
 142     def get_hd(self
, fm
): 
 151         return hd_id_dict
[fm
] 
 153     def parse_ext_l(self
, fm
): 
 164     def get_format_name(self
, fm
): 
 175     def _real_extract(self
, url
): 
 176         video_id 
= self
._match
_id
(url
) 
 178         def retrieve_data(req_url
, note
): 
 179             req 
= compat_urllib_request
.Request(req_url
) 
 181             cn_verification_proxy 
= self
._downloader
.params
.get('cn_verification_proxy') 
 182             if cn_verification_proxy
: 
 183                 req
.add_header('Ytdl-request-proxy', cn_verification_proxy
) 
 185             raw_data 
= self
._download
_json
(req
, video_id
, note
=note
) 
 186             return raw_data
['data'][0] 
 189         data1 
= retrieve_data( 
 190             'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
, 
 191             'Downloading JSON metadata 1') 
 192         data2 
= retrieve_data( 
 193             'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
, 
 194             'Downloading JSON metadata 2') 
 196         error_code 
= data1
.get('error_code') 
 198             error 
= data1
.get('error') 
 199             if error 
is not None and '因版权原因无法观看此视频' in error
: 
 200                 raise ExtractorError( 
 201                     'Youku said: Sorry, this video is available in China only', expected
=True) 
 203                 msg 
= 'Youku server reported error %i' % error_code
 
 204                 if error 
is not None: 
 206                 raise ExtractorError(msg
) 
 208         title 
= data1
['title'] 
 210         # generate video_urls_dict 
 211         video_urls_dict 
= self
.construct_video_urls(data1
, data2
) 
 215             'id': '%s_part%d' % (video_id
, i 
+ 1), 
 218             # some formats are not available for all parts, we have to detect 
 220         } for i 
in range(max(len(v
) for v 
in data1
['segs'].values()))] 
 221         for fm 
in data1
['streamtypes']: 
 222             video_urls 
= video_urls_dict
[fm
] 
 223             for video_url
, seg
, entry 
in zip(video_urls
, data1
['segs'][fm
], entries
): 
 224                 entry
['formats'].append({ 
 226                     'format_id': self
.get_format_name(fm
), 
 227                     'ext': self
.parse_ext_l(fm
), 
 228                     'filesize': int(seg
['size']), 
 232             '_type': 'multi_video',