]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youku.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import ExtractorError
12 compat_urllib_request
,
16 class YoukuIE(InfoExtractor
):
21 http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
23 (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
27 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
28 'md5': '5f3af4192eabacc4501508d54a8cabd7',
30 'id': 'XMTc1ODE5Njcy_part1',
31 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
35 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
36 'only_matching': True,
38 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
40 'id': 'XODgxNjg1Mzk2',
45 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
47 'id': 'XMTI1OTczNDM5Mg',
51 'skip': 'Available in China only',
54 def construct_video_urls(self
, data1
, data2
):
60 t
= (t
+ ls
[i
] + compat_ord(s1
[i
% len(s1
)])) % 256
61 ls
[i
], ls
[t
] = ls
[t
], ls
[i
]
64 for i
in range(len(s2
)):
67 ls
[x
], ls
[y
] = ls
[y
], ls
[x
]
68 s
.append(compat_ord(s2
[i
]) ^ ls
[(ls
[x
] + ls
[y
]) % 256])
72 b
'becaf9be', base64
.b64decode(data2
['ep'].encode('ascii'))
73 ).decode('ascii').split('_')
80 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
81 shuffled_string_ls
= []
85 seed
= (seed
* 0xd3 + 0x754f) % 0x10000
86 idx
= seed
* len(string_ls
) // 0x10000
87 shuffled_string_ls
.append(string_ls
[idx
])
91 for format
in data1
['streamtypes']:
93 int(i
) for i
in data1
['streamfileids'][format
].strip('*').split('*')]
95 [shuffled_string_ls
[i
] for i
in streamfileid
])
96 fileid_dict
[format
] = fileid
[:8] + '%s' + fileid
[10:]
98 def get_fileid(format
, n
):
99 fileid
= fileid_dict
[format
] % hex(int(n
))[2:].upper().zfill(2)
103 def generate_ep(format
, n
):
104 fileid
= get_fileid(format
, n
)
107 ('%s_%s_%s' % (sid
, fileid
, token
)).encode('ascii')
109 ep
= base64
.b64encode(ep_t
).decode('ascii')
112 # generate video_urls
114 for format
in data1
['streamtypes']:
116 for dt
in data1
['segs'][format
]:
117 n
= str(int(dt
['no']))
120 'hd': self
.get_hd(format
),
128 'ep': generate_ep(format
, n
)
131 'http://k.youku.com/player/getFlvPath/' + \
133 '_' + str(int(n
) + 1).zfill(2) + \
134 '/st/' + self
.parse_ext_l(format
) + \
135 '/fileid/' + get_fileid(format
, n
) + '?' + \
136 compat_urllib_parse
.urlencode(param
)
137 video_urls
.append(video_url
)
138 video_urls_dict
[format
] = video_urls
140 return video_urls_dict
142 def get_hd(self
, fm
):
151 return hd_id_dict
[fm
]
153 def parse_ext_l(self
, fm
):
164 def get_format_name(self
, fm
):
175 def _real_extract(self
, url
):
176 video_id
= self
._match
_id
(url
)
178 def retrieve_data(req_url
, note
):
179 req
= compat_urllib_request
.Request(req_url
)
181 cn_verification_proxy
= self
._downloader
.params
.get('cn_verification_proxy')
182 if cn_verification_proxy
:
183 req
.add_header('Ytdl-request-proxy', cn_verification_proxy
)
185 raw_data
= self
._download
_json
(req
, video_id
, note
=note
)
186 return raw_data
['data'][0]
189 data1
= retrieve_data(
190 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
,
191 'Downloading JSON metadata 1')
192 data2
= retrieve_data(
193 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
,
194 'Downloading JSON metadata 2')
196 error_code
= data1
.get('error_code')
198 error
= data1
.get('error')
199 if error
is not None and '因版权原因无法观看此视频' in error
:
200 raise ExtractorError(
201 'Youku said: Sorry, this video is available in China only', expected
=True)
203 msg
= 'Youku server reported error %i' % error_code
204 if error
is not None:
206 raise ExtractorError(msg
)
208 title
= data1
['title']
210 # generate video_urls_dict
211 video_urls_dict
= self
.construct_video_urls(data1
, data2
)
215 'id': '%s_part%d' % (video_id
, i
+ 1),
218 # some formats are not available for all parts, we have to detect
220 } for i
in range(max(len(v
) for v
in data1
['segs'].values()))]
221 for fm
in data1
['streamtypes']:
222 video_urls
= video_urls_dict
[fm
]
223 for video_url
, seg
, entry
in zip(video_urls
, data1
['segs'][fm
], entries
):
224 entry
['formats'].append({
226 'format_id': self
.get_format_name(fm
),
227 'ext': self
.parse_ext_l(fm
),
228 'filesize': int(seg
['size']),
232 '_type': 'multi_video',