]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youku.py
69ecc837a4d6d94c82a1055cb7cc41d6e6663763
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
17 class YoukuIE(InfoExtractor
):
22 http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
24 (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
28 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
29 'md5': '5f3af4192eabacc4501508d54a8cabd7',
31 'id': 'XMTc1ODE5Njcy_part1',
32 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
36 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
37 'only_matching': True,
39 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
41 'id': 'XODgxNjg1Mzk2',
46 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
48 'id': 'XMTI1OTczNDM5Mg',
52 'skip': 'Available in China only',
54 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
55 'note': 'Video protected with password',
57 'id': 'XNjA1NzA2Njgw',
58 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
62 'videopassword': '100600',
66 def construct_video_urls(self
, data1
, data2
):
72 t
= (t
+ ls
[i
] + compat_ord(s1
[i
% len(s1
)])) % 256
73 ls
[i
], ls
[t
] = ls
[t
], ls
[i
]
76 for i
in range(len(s2
)):
79 ls
[x
], ls
[y
] = ls
[y
], ls
[x
]
80 s
.append(compat_ord(s2
[i
]) ^ ls
[(ls
[x
] + ls
[y
]) % 256])
84 b
'becaf9be', base64
.b64decode(data2
['ep'].encode('ascii'))
85 ).decode('ascii').split('_')
92 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
93 shuffled_string_ls
= []
97 seed
= (seed
* 0xd3 + 0x754f) % 0x10000
98 idx
= seed
* len(string_ls
) // 0x10000
99 shuffled_string_ls
.append(string_ls
[idx
])
103 for format
in data1
['streamtypes']:
105 int(i
) for i
in data1
['streamfileids'][format
].strip('*').split('*')]
107 [shuffled_string_ls
[i
] for i
in streamfileid
])
108 fileid_dict
[format
] = fileid
[:8] + '%s' + fileid
[10:]
110 def get_fileid(format
, n
):
111 fileid
= fileid_dict
[format
] % hex(int(n
))[2:].upper().zfill(2)
115 def generate_ep(format
, n
):
116 fileid
= get_fileid(format
, n
)
119 ('%s_%s_%s' % (sid
, fileid
, token
)).encode('ascii')
121 ep
= base64
.b64encode(ep_t
).decode('ascii')
124 # generate video_urls
126 for format
in data1
['streamtypes']:
128 for dt
in data1
['segs'][format
]:
129 n
= str(int(dt
['no']))
132 'hd': self
.get_hd(format
),
140 'ep': generate_ep(format
, n
)
143 'http://k.youku.com/player/getFlvPath/' + \
145 '_' + str(int(n
) + 1).zfill(2) + \
146 '/st/' + self
.parse_ext_l(format
) + \
147 '/fileid/' + get_fileid(format
, n
) + '?' + \
148 compat_urllib_parse
.urlencode(param
)
149 video_urls
.append(video_url
)
150 video_urls_dict
[format
] = video_urls
152 return video_urls_dict
154 def get_hd(self
, fm
):
163 return hd_id_dict
[fm
]
165 def parse_ext_l(self
, fm
):
176 def get_format_name(self
, fm
):
187 def _real_extract(self
, url
):
188 video_id
= self
._match
_id
(url
)
190 def retrieve_data(req_url
, note
):
191 req
= sanitized_Request(req_url
)
193 cn_verification_proxy
= self
._downloader
.params
.get('cn_verification_proxy')
194 if cn_verification_proxy
:
195 req
.add_header('Ytdl-request-proxy', cn_verification_proxy
)
197 raw_data
= self
._download
_json
(req
, video_id
, note
=note
)
198 return raw_data
['data'][0]
200 video_password
= self
._downloader
.params
.get('videopassword', None)
203 basic_data_url
= 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
205 basic_data_url
+= '?password=%s' % video_password
207 data1
= retrieve_data(
209 'Downloading JSON metadata 1')
210 data2
= retrieve_data(
211 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
,
212 'Downloading JSON metadata 2')
214 error_code
= data1
.get('error_code')
216 error
= data1
.get('error')
217 if error
is not None and '因版权原因无法观看此视频' in error
:
218 raise ExtractorError(
219 'Youku said: Sorry, this video is available in China only', expected
=True)
221 msg
= 'Youku server reported error %i' % error_code
222 if error
is not None:
224 raise ExtractorError(msg
)
226 title
= data1
['title']
228 # generate video_urls_dict
229 video_urls_dict
= self
.construct_video_urls(data1
, data2
)
233 'id': '%s_part%d' % (video_id
, i
+ 1),
236 # some formats are not available for all parts, we have to detect
238 } for i
in range(max(len(v
) for v
in data1
['segs'].values()))]
239 for fm
in data1
['streamtypes']:
240 video_urls
= video_urls_dict
[fm
]
241 for video_url
, seg
, entry
in zip(video_urls
, data1
['segs'][fm
], entries
):
242 entry
['formats'].append({
244 'format_id': self
.get_format_name(fm
),
245 'ext': self
.parse_ext_l(fm
),
246 'filesize': int(seg
['size']),
250 '_type': 'multi_video',