]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youku.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import ExtractorError
12 compat_urllib_request
,
16 class YoukuIE(InfoExtractor
):
21 http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
23 (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
27 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
28 'md5': '5f3af4192eabacc4501508d54a8cabd7',
30 'id': 'XMTc1ODE5Njcy_part1',
31 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
35 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
36 'only_matching': True,
38 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
40 'id': 'XODgxNjg1Mzk2',
45 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
47 'id': 'XMTI1OTczNDM5Mg',
51 'skip': 'Available in China only',
53 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
54 'note': 'Video protected with password',
56 'id': 'XNjA1NzA2Njgw',
57 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
61 'videopassword': '100600',
65 def construct_video_urls(self
, data1
, data2
):
71 t
= (t
+ ls
[i
] + compat_ord(s1
[i
% len(s1
)])) % 256
72 ls
[i
], ls
[t
] = ls
[t
], ls
[i
]
75 for i
in range(len(s2
)):
78 ls
[x
], ls
[y
] = ls
[y
], ls
[x
]
79 s
.append(compat_ord(s2
[i
]) ^ ls
[(ls
[x
] + ls
[y
]) % 256])
83 b
'becaf9be', base64
.b64decode(data2
['ep'].encode('ascii'))
84 ).decode('ascii').split('_')
91 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
92 shuffled_string_ls
= []
96 seed
= (seed
* 0xd3 + 0x754f) % 0x10000
97 idx
= seed
* len(string_ls
) // 0x10000
98 shuffled_string_ls
.append(string_ls
[idx
])
102 for format
in data1
['streamtypes']:
104 int(i
) for i
in data1
['streamfileids'][format
].strip('*').split('*')]
106 [shuffled_string_ls
[i
] for i
in streamfileid
])
107 fileid_dict
[format
] = fileid
[:8] + '%s' + fileid
[10:]
109 def get_fileid(format
, n
):
110 fileid
= fileid_dict
[format
] % hex(int(n
))[2:].upper().zfill(2)
114 def generate_ep(format
, n
):
115 fileid
= get_fileid(format
, n
)
118 ('%s_%s_%s' % (sid
, fileid
, token
)).encode('ascii')
120 ep
= base64
.b64encode(ep_t
).decode('ascii')
123 # generate video_urls
125 for format
in data1
['streamtypes']:
127 for dt
in data1
['segs'][format
]:
128 n
= str(int(dt
['no']))
131 'hd': self
.get_hd(format
),
139 'ep': generate_ep(format
, n
)
142 'http://k.youku.com/player/getFlvPath/' + \
144 '_' + str(int(n
) + 1).zfill(2) + \
145 '/st/' + self
.parse_ext_l(format
) + \
146 '/fileid/' + get_fileid(format
, n
) + '?' + \
147 compat_urllib_parse
.urlencode(param
)
148 video_urls
.append(video_url
)
149 video_urls_dict
[format
] = video_urls
151 return video_urls_dict
153 def get_hd(self
, fm
):
162 return hd_id_dict
[fm
]
164 def parse_ext_l(self
, fm
):
175 def get_format_name(self
, fm
):
186 def _real_extract(self
, url
):
187 video_id
= self
._match
_id
(url
)
189 def retrieve_data(req_url
, note
):
190 req
= compat_urllib_request
.Request(req_url
)
192 cn_verification_proxy
= self
._downloader
.params
.get('cn_verification_proxy')
193 if cn_verification_proxy
:
194 req
.add_header('Ytdl-request-proxy', cn_verification_proxy
)
196 raw_data
= self
._download
_json
(req
, video_id
, note
=note
)
197 return raw_data
['data'][0]
199 video_password
= self
._downloader
.params
.get('videopassword', None)
202 basic_data_url
= 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
204 basic_data_url
+= '?password=%s' % video_password
206 data1
= retrieve_data(
208 'Downloading JSON metadata 1')
209 data2
= retrieve_data(
210 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
,
211 'Downloading JSON metadata 2')
213 error_code
= data1
.get('error_code')
215 error
= data1
.get('error')
216 if error
is not None and '因版权原因无法观看此视频' in error
:
217 raise ExtractorError(
218 'Youku said: Sorry, this video is available in China only', expected
=True)
220 msg
= 'Youku server reported error %i' % error_code
221 if error
is not None:
223 raise ExtractorError(msg
)
225 title
= data1
['title']
227 # generate video_urls_dict
228 video_urls_dict
= self
.construct_video_urls(data1
, data2
)
232 'id': '%s_part%d' % (video_id
, i
+ 1),
235 # some formats are not available for all parts, we have to detect
237 } for i
in range(max(len(v
) for v
in data1
['segs'].values()))]
238 for fm
in data1
['streamtypes']:
239 video_urls
= video_urls_dict
[fm
]
240 for video_url
, seg
, entry
in zip(video_urls
, data1
['segs'][fm
], entries
):
241 entry
['formats'].append({
243 'format_id': self
.get_format_name(fm
),
244 'ext': self
.parse_ext_l(fm
),
245 'filesize': int(seg
['size']),
249 '_type': 'multi_video',