2 from __future__
import unicode_literals
13 from .common
import InfoExtractor
14 from ..compat
import (
18 compat_urllib_parse_urlparse
,
31 return hashlib
.md5(text
.encode('utf-8')).hexdigest()
34 class IqiyiSDK(object):
35 def __init__(self
, target
, ip
, timestamp
):
38 self
.timestamp
= timestamp
42 return compat_str(sum(map(lambda p
: int(p
, 16), list(data
))))
46 if isinstance(num
, int):
48 return compat_str(sum(map(int, num
)))
51 even
= self
.digit_sum(compat_str(self
.timestamp
)[::2])
52 odd
= self
.digit_sum(compat_str(self
.timestamp
)[1::2])
55 def preprocess(self
, chunksize
):
56 self
.target
= md5_text(self
.target
)
58 for i
in range(32 // chunksize
):
59 chunks
.append(self
.target
[chunksize
* i
:chunksize
* (i
+ 1)])
61 chunks
.append(self
.target
[32 - 32 % chunksize
:])
62 return chunks
, list(map(int, self
.ip
.split('.')))
64 def mod(self
, modulus
):
65 chunks
, ip
= self
.preprocess(32)
66 self
.target
= chunks
[0] + ''.join(map(lambda p
: compat_str(p
% modulus
), ip
))
68 def split(self
, chunksize
):
75 chunks
, ip
= self
.preprocess(chunksize
)
77 for i
in range(len(chunks
)):
78 ip_part
= compat_str(ip
[i
] % modulus_map
[chunksize
]) if i
< 4 else ''
80 ret
+= ip_part
+ chunks
[i
]
82 ret
+= chunks
[i
] + ip_part
85 def handle_input16(self
):
86 self
.target
= md5_text(self
.target
)
87 self
.target
= self
.split_sum(self
.target
[:16]) + self
.target
+ self
.split_sum(self
.target
[16:])
89 def handle_input8(self
):
90 self
.target
= md5_text(self
.target
)
93 part
= self
.target
[8 * i
:8 * (i
+ 1)]
94 ret
+= self
.split_sum(part
) + part
98 self
.target
= md5_text(self
.target
)
99 self
.target
= self
.split_sum(self
.target
) + self
.target
101 def date(self
, scheme
):
102 self
.target
= md5_text(self
.target
)
103 d
= time
.localtime(self
.timestamp
)
105 'y': compat_str(d
.tm_year
),
106 'm': '%02d' % d
.tm_mon
,
107 'd': '%02d' % d
.tm_mday
,
109 self
.target
+= ''.join(map(lambda c
: strings
[c
], list(scheme
)))
111 def split_time_even_odd(self
):
112 even
, odd
= self
.even_odd()
113 self
.target
= odd
+ md5_text(self
.target
) + even
115 def split_time_odd_even(self
):
116 even
, odd
= self
.even_odd()
117 self
.target
= even
+ md5_text(self
.target
) + odd
119 def split_ip_time_sum(self
):
120 chunks
, ip
= self
.preprocess(32)
121 self
.target
= compat_str(sum(ip
)) + chunks
[0] + self
.digit_sum(self
.timestamp
)
123 def split_time_ip_sum(self
):
124 chunks
, ip
= self
.preprocess(32)
125 self
.target
= self
.digit_sum(self
.timestamp
) + chunks
[0] + compat_str(sum(ip
))
128 class IqiyiSDKInterpreter(object):
129 BASE62_TABLE
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
131 def __init__(self
, sdk_code
):
132 self
.sdk_code
= sdk_code
135 def base62(cls
, num
):
140 ret
= cls
.BASE62_TABLE
[num
% 62] + ret
144 def decode_eval_codes(self
):
145 self
.sdk_code
= self
.sdk_code
[5:-3]
148 r
"'([^']+)',62,(\d+),'([^']+)'\.split\('\|'\),[^,]+,{}",
150 obfucasted_code
, count
, symbols
= mobj
.groups()
152 symbols
= symbols
.split('|')
157 b62count
= self
.base62(count
)
158 symbol_table
[b62count
] = symbols
[count
] or b62count
160 self
.sdk_code
= re
.sub(
161 r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)],
164 def run(self
, target
, ip
, timestamp
):
165 self
.decode_eval_codes()
167 functions
= re
.findall(r
'input=([a-zA-Z0-9]+)\(input', self
.sdk_code
)
169 sdk
= IqiyiSDK(target
, ip
, timestamp
)
172 'handleSum': sdk
.handleSum
,
173 'handleInput8': sdk
.handle_input8
,
174 'handleInput16': sdk
.handle_input16
,
175 'splitTimeEvenOdd': sdk
.split_time_even_odd
,
176 'splitTimeOddEven': sdk
.split_time_odd_even
,
177 'splitIpTimeSum': sdk
.split_ip_time_sum
,
178 'splitTimeIpSum': sdk
.split_time_ip_sum
,
180 for function
in functions
:
181 if re
.match(r
'mod\d+', function
):
182 sdk
.mod(int(function
[3:]))
183 elif re
.match(r
'date[ymd]{3}', function
):
184 sdk
.date(function
[4:])
185 elif re
.match(r
'split\d+', function
):
186 sdk
.split(int(function
[5:]))
187 elif function
in other_functions
:
188 other_functions
[function
]()
190 raise ExtractorError('Unknown funcion %s' % function
)
195 class IqiyiIE(InfoExtractor
):
199 _VALID_URL
= r
'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'
201 _NETRC_MACHINE
= 'iqiyi'
204 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
205 'md5': '2cb594dc2781e6c941a110d8f358118b',
207 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
208 'title': '美国德州空中惊现奇异云团 酷似UFO',
212 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
214 'id': 'e3f585b550a280af23c98b6cb2be19fb',
215 'title': '名侦探柯南第752集',
219 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
221 'title': '名侦探柯南第752集',
225 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
227 'title': '名侦探柯南第752集',
231 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
233 'title': '名侦探柯南第752集',
237 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
239 'title': '名侦探柯南第752集',
243 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
245 'title': '名侦探柯南第752集',
249 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
251 'title': '名侦探柯南第752集',
255 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
257 'title': '名侦探柯南第752集',
261 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
263 'title': '名侦探柯南第752集',
267 'skip_download': True,
270 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
271 'only_matching': True,
273 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
274 'only_matching': True,
276 'url': 'http://yule.iqiyi.com/pcb.html',
277 'only_matching': True,
279 # VIP-only video. The first 2 parts (6 minutes) are available without login
280 # MD5 sums omitted as values are different on Travis CI and my machine
281 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
283 'id': 'f3cf468b39dddb30d676f89a91200dc1',
288 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
294 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
299 'expected_warnings': ['Needs a VIP account for full video'],
301 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
306 'playlist_count': 101,
318 def _real_initialize(self
):
323 # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
324 N
= 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
327 return ohdave_rsa_encrypt(data
, e
, N
)
330 (username
, password
) = self
._get
_login
_info
()
332 # No authentication to be performed
336 data
= self
._download
_json
(
337 'http://kylin.iqiyi.com/get_token', None,
338 note
='Get token for logging', errnote
='Unable to get token for logging')
340 timestamp
= int(time
.time())
341 target
= '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % (
342 username
, self
._rsa
_fun
(password
.encode('utf-8')))
344 interp
= IqiyiSDKInterpreter(sdk
)
345 sign
= interp
.run(target
, data
['ip'], timestamp
)
347 validation_params
= {
349 'server': 'BEA3AA1908656AABCCFF76582C4C6660',
350 'token': data
['token'],
351 'bird_src': 'f8d91d57af224da7893dd397d52d811a',
355 validation_result
= self
._download
_json
(
356 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse
.urlencode(validation_params
), None,
357 note
='Validate credentials', errnote
='Unable to validate credentials')
360 'P00107': 'please login via the web interface and enter the CAPTCHA code',
361 'P00117': 'bad username or password',
364 code
= validation_result
['code']
366 msg
= MSG_MAP
.get(code
)
368 msg
= 'error %s' % code
369 if validation_result
.get('msg'):
370 msg
+= ': ' + validation_result
['msg']
371 self
._downloader
.report_warning('unable to log in: ' + msg
)
376 def _authenticate_vip_video(self
, api_video_url
, video_id
, tvid
, _uuid
, do_report_warning
):
378 # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
380 'platform': 'b6c13e26323c537d',
385 'playType': 'main', # XXX: always main?
386 'filename': os
.path
.splitext(url_basename(api_video_url
))[0],
389 qd_items
= compat_parse_qs(compat_urllib_parse_urlparse(api_video_url
).query
)
390 for key
, val
in qd_items
.items():
391 auth_params
[key
] = val
[0]
393 auth_req
= sanitized_Request(
394 'http://api.vip.iqiyi.com/services/ckn.action',
395 urlencode_postdata(auth_params
))
396 # iQiyi server throws HTTP 405 error without the following header
397 auth_req
.add_header('Content-Type', 'application/x-www-form-urlencoded')
398 auth_result
= self
._download
_json
(
400 note
='Downloading video authentication JSON',
401 errnote
='Unable to download video authentication JSON')
402 if auth_result
['code'] == 'Q00506': # requires a VIP account
403 if do_report_warning
:
404 self
.report_warning('Needs a VIP account for full video')
409 def construct_video_urls(self
, data
, video_id
, _uuid
, tvid
):
418 def get_encode_code(l
):
423 for i
in range(c
- 1, -1, -1):
424 a
= do_xor(int(b
[c
- i
- 1], 16), i
)
428 def get_path_key(x
, format_id
, segment_index
):
429 mg
= ')(*&^flash@#$%a'
430 tm
= self
._download
_json
(
431 'http://data.video.qiyi.com/t?tn=' + str(random
.random()), video_id
,
432 note
='Download path key of segment %d for format %s' % (segment_index
+ 1, format_id
)
434 t
= str(int(math
.floor(int(tm
) / (600.0))))
435 return md5_text(t
+ mg
+ x
)
438 need_vip_warning_report
= True
439 for format_item
in data
['vp']['tkl'][0]['vs']:
440 if 0 < int(format_item
['bid']) <= 10:
441 format_id
= self
.get_format(format_item
['bid'])
447 video_urls_info
= format_item
['fs']
448 if not format_item
['fs'][0]['l'].startswith('/'):
449 t
= get_encode_code(format_item
['fs'][0]['l'])
450 if t
.endswith('mp4'):
451 video_urls_info
= format_item
['flvs']
453 for segment_index
, segment
in enumerate(video_urls_info
):
455 if not vl
.startswith('/'):
456 vl
= get_encode_code(vl
)
457 is_vip_video
= '/vip/' in vl
458 filesize
= segment
['b']
459 base_url
= data
['vp']['du'].split('/')
462 vl
.split('/')[-1].split('.')[0], format_id
, segment_index
)
463 base_url
.insert(-1, key
)
464 base_url
= '/'.join(base_url
)
467 'qyid': uuid
.uuid4().hex,
472 'tn': str(int(time
.time()))
474 api_video_url
= base_url
+ vl
476 api_video_url
= api_video_url
.replace('.f4v', '.hml')
477 auth_result
= self
._authenticate
_vip
_video
(
478 api_video_url
, video_id
, tvid
, _uuid
, need_vip_warning_report
)
479 if auth_result
is False:
480 need_vip_warning_report
= False
483 't': auth_result
['data']['t'],
484 # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
485 'cid': 'afbe8fd3d73448c9',
487 'QY00001': auth_result
['data']['u'],
489 api_video_url
+= '?' if '?' not in api_video_url
else '&'
490 api_video_url
+= compat_urllib_parse
.urlencode(param
)
491 js
= self
._download
_json
(
492 api_video_url
, video_id
,
493 note
='Download video info of segment %d for format %s' % (segment_index
+ 1, format_id
))
496 (video_url
, filesize
))
498 video_urls_dict
[format_id
] = video_urls
499 return video_urls_dict
501 def get_format(self
, bid
):
502 matched_format_ids
= [_format_id
for _bid
, _format_id
in self
._FORMATS
_MAP
if _bid
== str(bid
)]
503 return matched_format_ids
[0] if len(matched_format_ids
) else None
505 def get_bid(self
, format_id
):
506 matched_bids
= [_bid
for _bid
, _format_id
in self
._FORMATS
_MAP
if _format_id
== format_id
]
507 return matched_bids
[0] if len(matched_bids
) else None
509 def get_raw_data(self
, tvid
, video_id
, enc_key
, _uuid
):
510 tm
= str(int(time
.time()))
514 'src': md5_text('youtube-dl'),
519 'enc': md5_text(enc_key
+ tail
),
521 'tn': random
.random(),
523 'authkey': md5_text(md5_text('') + tail
),
527 api_url
= 'http://cache.video.qiyi.com/vms' + '?' + \
528 compat_urllib_parse
.urlencode(param
)
529 raw_data
= self
._download
_json
(api_url
, video_id
)
532 def get_enc_key(self
, swf_url
, video_id
):
533 # TODO: automatic key extraction
534 # last update at 2016-01-22 for Zombie::bite
535 enc_key
= '6ab6d0280511493ba85594779759d4ed'
538 def _extract_playlist(self
, webpage
):
542 r
'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
547 album_id
= self
._search
_regex
(
548 r
'albumId\s*:\s*(\d+),', webpage
, 'album ID')
549 album_title
= self
._search
_regex
(
550 r
'data-share-title="([^"]+)"', webpage
, 'album title', fatal
=False)
552 entries
= list(map(self
.url_result
, links
))
554 # Start from 2 because links in the first page are already on webpage
555 for page_num
in itertools
.count(2):
556 pagelist_page
= self
._download
_webpage
(
557 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id
, page_num
, PAGE_SIZE
),
559 note
='Download playlist page %d' % page_num
,
560 errnote
='Failed to download playlist page %d' % page_num
)
561 pagelist
= self
._parse
_json
(
562 remove_start(pagelist_page
, 'var tvInfoJs='), album_id
)
563 vlist
= pagelist
['data']['vlist']
565 entries
.append(self
.url_result(item
['vurl']))
566 if len(vlist
) < PAGE_SIZE
:
569 return self
.playlist_result(entries
, album_id
, album_title
)
571 def _real_extract(self
, url
):
572 webpage
= self
._download
_webpage
(
573 url
, 'temp_id', note
='download video page')
575 # There's no simple way to determine whether an URL is a playlist or not
577 playlist_result
= self
._extract
_playlist
(webpage
)
579 return playlist_result
581 tvid
= self
._search
_regex
(
582 r
'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
583 video_id = self._search_regex(
584 r'data-player-videoid\s*=\s*[\'"]([a
-f\d
]+)', webpage, 'video_id
')
585 swf_url = self._search_regex(
586 r'(http
://[^
\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
587 _uuid = uuid.uuid4().hex
589 enc_key = self.get_enc_key(swf_url, video_id)
591 raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
593 if raw_data['code'] != 'A000000':
594 raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
596 data = raw_data['data']
598 title = data['vi']['vn']
600 # generate video_urls_dict
601 video_urls_dict = self.construct_video_urls(
602 data, video_id, _uuid, tvid)
606 for format_id in video_urls_dict:
607 video_urls = video_urls_dict[format_id]
608 for i, video_url_info in enumerate(video_urls):
609 if len(entries) < i + 1:
610 entries.append({'formats': []})
611 entries[i]['formats'].append(
613 'url': video_url_info[0],
614 'filesize': video_url_info[-1],
615 'format_id': format_id,
616 'preference': int(self.get_bid(format_id))
620 for i in range(len(entries)):
621 self._sort_formats(entries[i]['formats'])
624 'id': '%s_part%d' % (video_id, i + 1),
631 '_type': 'multi_video',
638 info['id'] = video_id
639 info['title'] = title