2 from __future__
import unicode_literals
13 from .common
import InfoExtractor
14 from ..compat
import (
17 compat_urllib_parse_urlencode
,
18 compat_urllib_parse_urlparse
,
32 return hashlib
.md5(text
.encode('utf-8')).hexdigest()
35 class IqiyiSDK(object):
36 def __init__(self
, target
, ip
, timestamp
):
39 self
.timestamp
= timestamp
43 return compat_str(sum(map(lambda p
: int(p
, 16), list(data
))))
47 if isinstance(num
, int):
49 return compat_str(sum(map(int, num
)))
52 even
= self
.digit_sum(compat_str(self
.timestamp
)[::2])
53 odd
= self
.digit_sum(compat_str(self
.timestamp
)[1::2])
56 def preprocess(self
, chunksize
):
57 self
.target
= md5_text(self
.target
)
59 for i
in range(32 // chunksize
):
60 chunks
.append(self
.target
[chunksize
* i
:chunksize
* (i
+ 1)])
62 chunks
.append(self
.target
[32 - 32 % chunksize
:])
63 return chunks
, list(map(int, self
.ip
.split('.')))
65 def mod(self
, modulus
):
66 chunks
, ip
= self
.preprocess(32)
67 self
.target
= chunks
[0] + ''.join(map(lambda p
: compat_str(p
% modulus
), ip
))
69 def split(self
, chunksize
):
76 chunks
, ip
= self
.preprocess(chunksize
)
78 for i
in range(len(chunks
)):
79 ip_part
= compat_str(ip
[i
] % modulus_map
[chunksize
]) if i
< 4 else ''
81 ret
+= ip_part
+ chunks
[i
]
83 ret
+= chunks
[i
] + ip_part
86 def handle_input16(self
):
87 self
.target
= md5_text(self
.target
)
88 self
.target
= self
.split_sum(self
.target
[:16]) + self
.target
+ self
.split_sum(self
.target
[16:])
90 def handle_input8(self
):
91 self
.target
= md5_text(self
.target
)
94 part
= self
.target
[8 * i
:8 * (i
+ 1)]
95 ret
+= self
.split_sum(part
) + part
99 self
.target
= md5_text(self
.target
)
100 self
.target
= self
.split_sum(self
.target
) + self
.target
102 def date(self
, scheme
):
103 self
.target
= md5_text(self
.target
)
104 d
= time
.localtime(self
.timestamp
)
106 'y': compat_str(d
.tm_year
),
107 'm': '%02d' % d
.tm_mon
,
108 'd': '%02d' % d
.tm_mday
,
110 self
.target
+= ''.join(map(lambda c
: strings
[c
], list(scheme
)))
112 def split_time_even_odd(self
):
113 even
, odd
= self
.even_odd()
114 self
.target
= odd
+ md5_text(self
.target
) + even
116 def split_time_odd_even(self
):
117 even
, odd
= self
.even_odd()
118 self
.target
= even
+ md5_text(self
.target
) + odd
120 def split_ip_time_sum(self
):
121 chunks
, ip
= self
.preprocess(32)
122 self
.target
= compat_str(sum(ip
)) + chunks
[0] + self
.digit_sum(self
.timestamp
)
124 def split_time_ip_sum(self
):
125 chunks
, ip
= self
.preprocess(32)
126 self
.target
= self
.digit_sum(self
.timestamp
) + chunks
[0] + compat_str(sum(ip
))
129 class IqiyiSDKInterpreter(object):
130 def __init__(self
, sdk_code
):
131 self
.sdk_code
= sdk_code
133 def run(self
, target
, ip
, timestamp
):
134 self
.sdk_code
= decode_packed_codes(self
.sdk_code
)
136 functions
= re
.findall(r
'input=([a-zA-Z0-9]+)\(input', self
.sdk_code
)
138 sdk
= IqiyiSDK(target
, ip
, timestamp
)
141 'handleSum': sdk
.handleSum
,
142 'handleInput8': sdk
.handle_input8
,
143 'handleInput16': sdk
.handle_input16
,
144 'splitTimeEvenOdd': sdk
.split_time_even_odd
,
145 'splitTimeOddEven': sdk
.split_time_odd_even
,
146 'splitIpTimeSum': sdk
.split_ip_time_sum
,
147 'splitTimeIpSum': sdk
.split_time_ip_sum
,
149 for function
in functions
:
150 if re
.match(r
'mod\d+', function
):
151 sdk
.mod(int(function
[3:]))
152 elif re
.match(r
'date[ymd]{3}', function
):
153 sdk
.date(function
[4:])
154 elif re
.match(r
'split\d+', function
):
155 sdk
.split(int(function
[5:]))
156 elif function
in other_functions
:
157 other_functions
[function
]()
159 raise ExtractorError('Unknown funcion %s' % function
)
164 class IqiyiIE(InfoExtractor
):
168 _VALID_URL
= r
'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
170 _NETRC_MACHINE
= 'iqiyi'
173 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
174 'md5': '2cb594dc2781e6c941a110d8f358118b',
176 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
177 'title': '美国德州空中惊现奇异云团 酷似UFO',
181 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
183 'id': 'e3f585b550a280af23c98b6cb2be19fb',
184 'title': '名侦探柯南第752集',
188 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
190 'title': '名侦探柯南第752集',
194 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
196 'title': '名侦探柯南第752集',
200 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
202 'title': '名侦探柯南第752集',
206 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
208 'title': '名侦探柯南第752集',
212 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
214 'title': '名侦探柯南第752集',
218 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
220 'title': '名侦探柯南第752集',
224 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
226 'title': '名侦探柯南第752集',
230 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
232 'title': '名侦探柯南第752集',
236 'skip_download': True,
239 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
240 'only_matching': True,
242 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
243 'only_matching': True,
245 'url': 'http://yule.iqiyi.com/pcb.html',
246 'only_matching': True,
248 # VIP-only video. The first 2 parts (6 minutes) are available without login
249 # MD5 sums omitted as values are different on Travis CI and my machine
250 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
252 'id': 'f3cf468b39dddb30d676f89a91200dc1',
257 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
263 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
268 'expected_warnings': ['Needs a VIP account for full video'],
270 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
275 'playlist_count': 101,
277 'url': 'http://www.pps.tv/w_19rrbav0ph.html',
278 'only_matching': True,
291 # No preview available (不允许试看鉴权失败)
292 'Q00505': 'This video requires a VIP account',
293 # End of preview time (试看结束鉴权失败)
294 'Q00506': 'Needs a VIP account for full video',
297 def _real_initialize(self
):
302 # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
303 N
= 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
306 return ohdave_rsa_encrypt(data
, e
, N
)
309 (username
, password
) = self
._get
_login
_info
()
311 # No authentication to be performed
315 data
= self
._download
_json
(
316 'http://kylin.iqiyi.com/get_token', None,
317 note
='Get token for logging', errnote
='Unable to get token for logging')
319 timestamp
= int(time
.time())
320 target
= '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % (
321 username
, self
._rsa
_fun
(password
.encode('utf-8')))
323 interp
= IqiyiSDKInterpreter(sdk
)
324 sign
= interp
.run(target
, data
['ip'], timestamp
)
326 validation_params
= {
328 'server': 'BEA3AA1908656AABCCFF76582C4C6660',
329 'token': data
['token'],
330 'bird_src': 'f8d91d57af224da7893dd397d52d811a',
334 validation_result
= self
._download
_json
(
335 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params
), None,
336 note
='Validate credentials', errnote
='Unable to validate credentials')
339 'P00107': 'please login via the web interface and enter the CAPTCHA code',
340 'P00117': 'bad username or password',
343 code
= validation_result
['code']
345 msg
= MSG_MAP
.get(code
)
347 msg
= 'error %s' % code
348 if validation_result
.get('msg'):
349 msg
+= ': ' + validation_result
['msg']
350 self
._downloader
.report_warning('unable to log in: ' + msg
)
355 def _authenticate_vip_video(self
, api_video_url
, video_id
, tvid
, _uuid
, do_report_warning
):
357 # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
359 'platform': 'b6c13e26323c537d',
364 'playType': 'main', # XXX: always main?
365 'filename': os
.path
.splitext(url_basename(api_video_url
))[0],
368 qd_items
= compat_parse_qs(compat_urllib_parse_urlparse(api_video_url
).query
)
369 for key
, val
in qd_items
.items():
370 auth_params
[key
] = val
[0]
372 auth_req
= sanitized_Request(
373 'http://api.vip.iqiyi.com/services/ckn.action',
374 urlencode_postdata(auth_params
))
375 # iQiyi server throws HTTP 405 error without the following header
376 auth_req
.add_header('Content-Type', 'application/x-www-form-urlencoded')
377 auth_result
= self
._download
_json
(
379 note
='Downloading video authentication JSON',
380 errnote
='Unable to download video authentication JSON')
382 code
= auth_result
.get('code')
383 msg
= self
.AUTH_API_ERRORS
.get(code
) or auth_result
.get('msg') or code
385 if do_report_warning
:
386 self
.report_warning(msg
)
388 if 'data' not in auth_result
:
390 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, msg
), expected
=True)
391 raise ExtractorError('Unexpected error from Iqiyi auth API')
393 return auth_result
['data']
395 def construct_video_urls(self
, data
, video_id
, _uuid
, tvid
):
404 def get_encode_code(l
):
409 for i
in range(c
- 1, -1, -1):
410 a
= do_xor(int(b
[c
- i
- 1], 16), i
)
414 def get_path_key(x
, format_id
, segment_index
):
415 mg
= ')(*&^flash@#$%a'
416 tm
= self
._download
_json
(
417 'http://data.video.qiyi.com/t?tn=' + str(random
.random()), video_id
,
418 note
='Download path key of segment %d for format %s' % (segment_index
+ 1, format_id
)
420 t
= str(int(math
.floor(int(tm
) / (600.0))))
421 return md5_text(t
+ mg
+ x
)
424 need_vip_warning_report
= True
425 for format_item
in data
['vp']['tkl'][0]['vs']:
426 if 0 < int(format_item
['bid']) <= 10:
427 format_id
= self
.get_format(format_item
['bid'])
433 video_urls_info
= format_item
['fs']
434 if not format_item
['fs'][0]['l'].startswith('/'):
435 t
= get_encode_code(format_item
['fs'][0]['l'])
436 if t
.endswith('mp4'):
437 video_urls_info
= format_item
['flvs']
439 for segment_index
, segment
in enumerate(video_urls_info
):
441 if not vl
.startswith('/'):
442 vl
= get_encode_code(vl
)
443 is_vip_video
= '/vip/' in vl
444 filesize
= segment
['b']
445 base_url
= data
['vp']['du'].split('/')
448 vl
.split('/')[-1].split('.')[0], format_id
, segment_index
)
449 base_url
.insert(-1, key
)
450 base_url
= '/'.join(base_url
)
453 'qyid': uuid
.uuid4().hex,
458 'tn': str(int(time
.time()))
460 api_video_url
= base_url
+ vl
462 api_video_url
= api_video_url
.replace('.f4v', '.hml')
463 auth_result
= self
._authenticate
_vip
_video
(
464 api_video_url
, video_id
, tvid
, _uuid
, need_vip_warning_report
)
465 if auth_result
is False:
466 need_vip_warning_report
= False
469 't': auth_result
['t'],
470 # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
471 'cid': 'afbe8fd3d73448c9',
473 'QY00001': auth_result
['u'],
475 api_video_url
+= '?' if '?' not in api_video_url
else '&'
476 api_video_url
+= compat_urllib_parse_urlencode(param
)
477 js
= self
._download
_json
(
478 api_video_url
, video_id
,
479 note
='Download video info of segment %d for format %s' % (segment_index
+ 1, format_id
))
482 (video_url
, filesize
))
484 video_urls_dict
[format_id
] = video_urls
485 return video_urls_dict
487 def get_format(self
, bid
):
488 matched_format_ids
= [_format_id
for _bid
, _format_id
in self
._FORMATS
_MAP
if _bid
== str(bid
)]
489 return matched_format_ids
[0] if len(matched_format_ids
) else None
491 def get_bid(self
, format_id
):
492 matched_bids
= [_bid
for _bid
, _format_id
in self
._FORMATS
_MAP
if _format_id
== format_id
]
493 return matched_bids
[0] if len(matched_bids
) else None
495 def get_raw_data(self
, tvid
, video_id
, enc_key
, _uuid
):
496 tm
= str(int(time
.time()))
500 'src': md5_text('youtube-dl'),
505 'enc': md5_text(enc_key
+ tail
),
507 'tn': random
.random(),
508 # In iQiyi's flash player, um is set to 1 if there's a logged user
509 # Some 1080P formats are only available with a logged user.
510 # Here force um=1 to trick the iQiyi server
512 'authkey': md5_text(md5_text('') + tail
),
516 api_url
= 'http://cache.video.qiyi.com/vms' + '?' + \
517 compat_urllib_parse_urlencode(param
)
518 raw_data
= self
._download
_json
(api_url
, video_id
)
521 def get_enc_key(self
, video_id
):
522 # TODO: automatic key extraction
523 # last update at 2016-01-22 for Zombie::bite
524 enc_key
= '4a1caba4b4465345366f28da7c117d20'
527 def _extract_playlist(self
, webpage
):
531 r
'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
536 album_id
= self
._search
_regex
(
537 r
'albumId\s*:\s*(\d+),', webpage
, 'album ID')
538 album_title
= self
._search
_regex
(
539 r
'data-share-title="([^"]+)"', webpage
, 'album title', fatal
=False)
541 entries
= list(map(self
.url_result
, links
))
543 # Start from 2 because links in the first page are already on webpage
544 for page_num
in itertools
.count(2):
545 pagelist_page
= self
._download
_webpage
(
546 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id
, page_num
, PAGE_SIZE
),
548 note
='Download playlist page %d' % page_num
,
549 errnote
='Failed to download playlist page %d' % page_num
)
550 pagelist
= self
._parse
_json
(
551 remove_start(pagelist_page
, 'var tvInfoJs='), album_id
)
552 vlist
= pagelist
['data']['vlist']
554 entries
.append(self
.url_result(item
['vurl']))
555 if len(vlist
) < PAGE_SIZE
:
558 return self
.playlist_result(entries
, album_id
, album_title
)
560 def _real_extract(self
, url
):
561 webpage
= self
._download
_webpage
(
562 url
, 'temp_id', note
='download video page')
564 # There's no simple way to determine whether an URL is a playlist or not
566 playlist_result
= self
._extract
_playlist
(webpage
)
568 return playlist_result
570 tvid
= self
._search
_regex
(
571 r
'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
572 video_id = self._search_regex(
573 r'data-player-videoid\s*=\s*[\'"]([a
-f\d
]+)', webpage, 'video_id
')
574 _uuid = uuid.uuid4().hex
576 enc_key = self.get_enc_key(video_id)
578 raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
580 if raw_data['code
'] != 'A000000
':
581 raise ExtractorError('Unable to load data
. Error code
: ' + raw_data['code
'])
583 data = raw_data['data
']
585 title = data['vi
']['vn
']
587 # generate video_urls_dict
588 video_urls_dict = self.construct_video_urls(
589 data, video_id, _uuid, tvid)
593 for format_id in video_urls_dict:
594 video_urls = video_urls_dict[format_id]
595 for i, video_url_info in enumerate(video_urls):
596 if len(entries) < i + 1:
597 entries.append({'formats
': []})
598 entries[i]['formats
'].append(
600 'url
': video_url_info[0],
601 'filesize
': video_url_info[-1],
602 'format_id
': format_id,
603 'preference
': int(self.get_bid(format_id))
607 for i in range(len(entries)):
608 self._sort_formats(entries[i]['formats
'])
611 'id': '%s_part
%d' % (video_id, i + 1),
618 '_type
': 'multi_video
',
625 info['id'] = video_id
626 info['title
'] = title