2 from __future__ 
import unicode_literals
 
  13 from .common 
import InfoExtractor
 
  14 from ..compat 
import ( 
  17     compat_urllib_parse_urlencode
, 
  18     compat_urllib_parse_urlparse
, 
  32     return hashlib
.md5(text
.encode('utf-8')).hexdigest() 
  35 class IqiyiSDK(object): 
  36     def __init__(self
, target
, ip
, timestamp
): 
  39         self
.timestamp 
= timestamp
 
  43         return compat_str(sum(map(lambda p
: int(p
, 16), list(data
)))) 
  47         if isinstance(num
, int): 
  49         return compat_str(sum(map(int, num
))) 
  52         even 
= self
.digit_sum(compat_str(self
.timestamp
)[::2]) 
  53         odd 
= self
.digit_sum(compat_str(self
.timestamp
)[1::2]) 
  56     def preprocess(self
, chunksize
): 
  57         self
.target 
= md5_text(self
.target
) 
  59         for i 
in range(32 // chunksize
): 
  60             chunks
.append(self
.target
[chunksize 
* i
:chunksize 
* (i 
+ 1)]) 
  62             chunks
.append(self
.target
[32 - 32 % chunksize
:]) 
  63         return chunks
, list(map(int, self
.ip
.split('.'))) 
  65     def mod(self
, modulus
): 
  66         chunks
, ip 
= self
.preprocess(32) 
  67         self
.target 
= chunks
[0] + ''.join(map(lambda p
: compat_str(p 
% modulus
), ip
)) 
  69     def split(self
, chunksize
): 
  76         chunks
, ip 
= self
.preprocess(chunksize
) 
  78         for i 
in range(len(chunks
)): 
  79             ip_part 
= compat_str(ip
[i
] % modulus_map
[chunksize
]) if i 
< 4 else '' 
  81                 ret 
+= ip_part 
+ chunks
[i
] 
  83                 ret 
+= chunks
[i
] + ip_part
 
  86     def handle_input16(self
): 
  87         self
.target 
= md5_text(self
.target
) 
  88         self
.target 
= self
.split_sum(self
.target
[:16]) + self
.target 
+ self
.split_sum(self
.target
[16:]) 
  90     def handle_input8(self
): 
  91         self
.target 
= md5_text(self
.target
) 
  94             part 
= self
.target
[8 * i
:8 * (i 
+ 1)] 
  95             ret 
+= self
.split_sum(part
) + part
 
  99         self
.target 
= md5_text(self
.target
) 
 100         self
.target 
= self
.split_sum(self
.target
) + self
.target
 
 102     def date(self
, scheme
): 
 103         self
.target 
= md5_text(self
.target
) 
 104         d 
= time
.localtime(self
.timestamp
) 
 106             'y': compat_str(d
.tm_year
), 
 107             'm': '%02d' % d
.tm_mon
, 
 108             'd': '%02d' % d
.tm_mday
, 
 110         self
.target 
+= ''.join(map(lambda c
: strings
[c
], list(scheme
))) 
 112     def split_time_even_odd(self
): 
 113         even
, odd 
= self
.even_odd() 
 114         self
.target 
= odd 
+ md5_text(self
.target
) + even
 
 116     def split_time_odd_even(self
): 
 117         even
, odd 
= self
.even_odd() 
 118         self
.target 
= even 
+ md5_text(self
.target
) + odd
 
 120     def split_ip_time_sum(self
): 
 121         chunks
, ip 
= self
.preprocess(32) 
 122         self
.target 
= compat_str(sum(ip
)) + chunks
[0] + self
.digit_sum(self
.timestamp
) 
 124     def split_time_ip_sum(self
): 
 125         chunks
, ip 
= self
.preprocess(32) 
 126         self
.target 
= self
.digit_sum(self
.timestamp
) + chunks
[0] + compat_str(sum(ip
)) 
 129 class IqiyiSDKInterpreter(object): 
 130     def __init__(self
, sdk_code
): 
 131         self
.sdk_code 
= sdk_code
 
 133     def run(self
, target
, ip
, timestamp
): 
 134         self
.sdk_code 
= decode_packed_codes(self
.sdk_code
) 
 136         functions 
= re
.findall(r
'input=([a-zA-Z0-9]+)\(input', self
.sdk_code
) 
 138         sdk 
= IqiyiSDK(target
, ip
, timestamp
) 
 141             'handleSum': sdk
.handleSum
, 
 142             'handleInput8': sdk
.handle_input8
, 
 143             'handleInput16': sdk
.handle_input16
, 
 144             'splitTimeEvenOdd': sdk
.split_time_even_odd
, 
 145             'splitTimeOddEven': sdk
.split_time_odd_even
, 
 146             'splitIpTimeSum': sdk
.split_ip_time_sum
, 
 147             'splitTimeIpSum': sdk
.split_time_ip_sum
, 
 149         for function 
in functions
: 
 150             if re
.match(r
'mod\d+', function
): 
 151                 sdk
.mod(int(function
[3:])) 
 152             elif re
.match(r
'date[ymd]{3}', function
): 
 153                 sdk
.date(function
[4:]) 
 154             elif re
.match(r
'split\d+', function
): 
 155                 sdk
.split(int(function
[5:])) 
 156             elif function 
in other_functions
: 
 157                 other_functions
[function
]() 
 159                 raise ExtractorError('Unknown funcion %s' % function
) 
 164 class IqiyiIE(InfoExtractor
): 
 168     _VALID_URL 
= r
'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' 
 170     _NETRC_MACHINE 
= 'iqiyi' 
 173         'url': 'http://www.iqiyi.com/v_19rrojlavg.html', 
 174         'md5': '2cb594dc2781e6c941a110d8f358118b', 
 176             'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', 
 177             'title': '美国德州空中惊现奇异云团 酷似UFO', 
 181         'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', 
 183             'id': 'e3f585b550a280af23c98b6cb2be19fb', 
 184             'title': '名侦探柯南第752集', 
 188                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', 
 190                 'title': '名侦探柯南第752集', 
 194                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', 
 196                 'title': '名侦探柯南第752集', 
 200                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', 
 202                 'title': '名侦探柯南第752集', 
 206                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', 
 208                 'title': '名侦探柯南第752集', 
 212                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', 
 214                 'title': '名侦探柯南第752集', 
 218                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', 
 220                 'title': '名侦探柯南第752集', 
 224                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', 
 226                 'title': '名侦探柯南第752集', 
 230                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', 
 232                 'title': '名侦探柯南第752集', 
 236             'skip_download': True, 
 239         'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', 
 240         'only_matching': True, 
 242         'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', 
 243         'only_matching': True, 
 245         'url': 'http://yule.iqiyi.com/pcb.html', 
 246         'only_matching': True, 
 248         # VIP-only video. The first 2 parts (6 minutes) are available without login 
 249         # MD5 sums omitted as values are different on Travis CI and my machine 
 250         'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', 
 252             'id': 'f3cf468b39dddb30d676f89a91200dc1', 
 257                 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1', 
 263                 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2', 
 268         'expected_warnings': ['Needs a VIP account for full video'], 
 270         'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', 
 275         'playlist_count': 101, 
 277         'url': 'http://www.pps.tv/w_19rrbav0ph.html', 
 278         'only_matching': True, 
 291         # No preview available (不允许试看鉴权失败) 
 292         'Q00505': 'This video requires a VIP account', 
 293         # End of preview time (试看结束鉴权失败) 
 294         'Q00506': 'Needs a VIP account for full video', 
 297     def _real_initialize(self
): 
 302         # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js 
 303         N 
= 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd 
 306         return ohdave_rsa_encrypt(data
, e
, N
) 
 309         (username
, password
) = self
._get
_login
_info
() 
 311         # No authentication to be performed 
 315         data 
= self
._download
_json
( 
 316             'http://kylin.iqiyi.com/get_token', None, 
 317             note
='Get token for logging', errnote
='Unable to get token for logging') 
 319         timestamp 
= int(time
.time()) 
 320         target 
= '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( 
 321             username
, self
._rsa
_fun
(password
.encode('utf-8'))) 
 323         interp 
= IqiyiSDKInterpreter(sdk
) 
 324         sign 
= interp
.run(target
, data
['ip'], timestamp
) 
 326         validation_params 
= { 
 328             'server': 'BEA3AA1908656AABCCFF76582C4C6660', 
 329             'token': data
['token'], 
 330             'bird_src': 'f8d91d57af224da7893dd397d52d811a', 
 334         validation_result 
= self
._download
_json
( 
 335             'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params
), None, 
 336             note
='Validate credentials', errnote
='Unable to validate credentials') 
 339             'P00107': 'please login via the web interface and enter the CAPTCHA code', 
 340             'P00117': 'bad username or password', 
 343         code 
= validation_result
['code'] 
 345             msg 
= MSG_MAP
.get(code
) 
 347                 msg 
= 'error %s' % code
 
 348                 if validation_result
.get('msg'): 
 349                     msg 
+= ': ' + validation_result
['msg'] 
 350             self
._downloader
.report_warning('unable to log in: ' + msg
) 
 355     def _authenticate_vip_video(self
, api_video_url
, video_id
, tvid
, _uuid
, do_report_warning
): 
 357             # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as 
 359             'platform': 'b6c13e26323c537d', 
 364             'playType': 'main',  # XXX: always main? 
 365             'filename': os
.path
.splitext(url_basename(api_video_url
))[0], 
 368         qd_items 
= compat_parse_qs(compat_urllib_parse_urlparse(api_video_url
).query
) 
 369         for key
, val 
in qd_items
.items(): 
 370             auth_params
[key
] = val
[0] 
 372         auth_req 
= sanitized_Request( 
 373             'http://api.vip.iqiyi.com/services/ckn.action', 
 374             urlencode_postdata(auth_params
)) 
 375         # iQiyi server throws HTTP 405 error without the following header 
 376         auth_req
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 377         auth_result 
= self
._download
_json
( 
 379             note
='Downloading video authentication JSON', 
 380             errnote
='Unable to download video authentication JSON') 
 382         code 
= auth_result
.get('code') 
 383         msg 
= self
.AUTH_API_ERRORS
.get(code
) or auth_result
.get('msg') or code
 
 385             if do_report_warning
: 
 386                 self
.report_warning(msg
) 
 388         if 'data' not in auth_result
: 
 390                 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, msg
), expected
=True) 
 391             raise ExtractorError('Unexpected error from Iqiyi auth API') 
 393         return auth_result
['data'] 
 395     def construct_video_urls(self
, data
, video_id
, _uuid
, tvid
): 
 404         def get_encode_code(l
): 
 409             for i 
in range(c 
- 1, -1, -1): 
 410                 a 
= do_xor(int(b
[c 
- i 
- 1], 16), i
) 
 414         def get_path_key(x
, format_id
, segment_index
): 
 415             mg 
= ')(*&^flash@#$%a' 
 416             tm 
= self
._download
_json
( 
 417                 'http://data.video.qiyi.com/t?tn=' + str(random
.random()), video_id
, 
 418                 note
='Download path key of segment %d for format %s' % (segment_index 
+ 1, format_id
) 
 420             t 
= str(int(math
.floor(int(tm
) / (600.0)))) 
 421             return md5_text(t 
+ mg 
+ x
) 
 424         need_vip_warning_report 
= True 
 425         for format_item 
in data
['vp']['tkl'][0]['vs']: 
 426             if 0 < int(format_item
['bid']) <= 10: 
 427                 format_id 
= self
.get_format(format_item
['bid']) 
 433             video_urls_info 
= format_item
['fs'] 
 434             if not format_item
['fs'][0]['l'].startswith('/'): 
 435                 t 
= get_encode_code(format_item
['fs'][0]['l']) 
 436                 if t
.endswith('mp4'): 
 437                     video_urls_info 
= format_item
['flvs'] 
 439             for segment_index
, segment 
in enumerate(video_urls_info
): 
 441                 if not vl
.startswith('/'): 
 442                     vl 
= get_encode_code(vl
) 
 443                 is_vip_video 
= '/vip/' in vl
 
 444                 filesize 
= segment
['b'] 
 445                 base_url 
= data
['vp']['du'].split('/') 
 448                         vl
.split('/')[-1].split('.')[0], format_id
, segment_index
) 
 449                     base_url
.insert(-1, key
) 
 450                 base_url 
= '/'.join(base_url
) 
 453                     'qyid': uuid
.uuid4().hex, 
 458                     'tn': str(int(time
.time())) 
 460                 api_video_url 
= base_url 
+ vl
 
 462                     api_video_url 
= api_video_url
.replace('.f4v', '.hml') 
 463                     auth_result 
= self
._authenticate
_vip
_video
( 
 464                         api_video_url
, video_id
, tvid
, _uuid
, need_vip_warning_report
) 
 465                     if auth_result 
is False: 
 466                         need_vip_warning_report 
= False 
 469                         't': auth_result
['t'], 
 470                         # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as 
 471                         'cid': 'afbe8fd3d73448c9', 
 473                         'QY00001': auth_result
['u'], 
 475                 api_video_url 
+= '?' if '?' not in api_video_url 
else '&' 
 476                 api_video_url 
+= compat_urllib_parse_urlencode(param
) 
 477                 js 
= self
._download
_json
( 
 478                     api_video_url
, video_id
, 
 479                     note
='Download video info of segment %d for format %s' % (segment_index 
+ 1, format_id
)) 
 482                     (video_url
, filesize
)) 
 484             video_urls_dict
[format_id
] = video_urls
 
 485         return video_urls_dict
 
 487     def get_format(self
, bid
): 
 488         matched_format_ids 
= [_format_id 
for _bid
, _format_id 
in self
._FORMATS
_MAP 
if _bid 
== str(bid
)] 
 489         return matched_format_ids
[0] if len(matched_format_ids
) else None 
 491     def get_bid(self
, format_id
): 
 492         matched_bids 
= [_bid 
for _bid
, _format_id 
in self
._FORMATS
_MAP 
if _format_id 
== format_id
] 
 493         return matched_bids
[0] if len(matched_bids
) else None 
 495     def get_raw_data(self
, tvid
, video_id
, enc_key
, _uuid
): 
 496         tm 
= str(int(time
.time())) 
 500             'src': md5_text('youtube-dl'), 
 505             'enc': md5_text(enc_key 
+ tail
), 
 507             'tn': random
.random(), 
 508             # In iQiyi's flash player, um is set to 1 if there's a logged user 
 509             # Some 1080P formats are only available with a logged user. 
 510             # Here force um=1 to trick the iQiyi server 
 512             'authkey': md5_text(md5_text('') + tail
), 
 516         api_url 
= 'http://cache.video.qiyi.com/vms' + '?' + \
 
 517             compat_urllib_parse_urlencode(param
) 
 518         raw_data 
= self
._download
_json
(api_url
, video_id
) 
 521     def get_enc_key(self
, video_id
): 
 522         # TODO: automatic key extraction 
 523         # last update at 2016-01-22 for Zombie::bite 
 524         enc_key 
= '4a1caba4b4465345366f28da7c117d20' 
 527     def _extract_playlist(self
, webpage
): 
 531             r
'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', 
 536         album_id 
= self
._search
_regex
( 
 537             r
'albumId\s*:\s*(\d+),', webpage
, 'album ID') 
 538         album_title 
= self
._search
_regex
( 
 539             r
'data-share-title="([^"]+)"', webpage
, 'album title', fatal
=False) 
 541         entries 
= list(map(self
.url_result
, links
)) 
 543         # Start from 2 because links in the first page are already on webpage 
 544         for page_num 
in itertools
.count(2): 
 545             pagelist_page 
= self
._download
_webpage
( 
 546                 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id
, page_num
, PAGE_SIZE
), 
 548                 note
='Download playlist page %d' % page_num
, 
 549                 errnote
='Failed to download playlist page %d' % page_num
) 
 550             pagelist 
= self
._parse
_json
( 
 551                 remove_start(pagelist_page
, 'var tvInfoJs='), album_id
) 
 552             vlist 
= pagelist
['data']['vlist'] 
 554                 entries
.append(self
.url_result(item
['vurl'])) 
 555             if len(vlist
) < PAGE_SIZE
: 
 558         return self
.playlist_result(entries
, album_id
, album_title
) 
 560     def _real_extract(self
, url
): 
 561         webpage 
= self
._download
_webpage
( 
 562             url
, 'temp_id', note
='download video page') 
 564         # There's no simple way to determine whether an URL is a playlist or not 
 566         playlist_result 
= self
._extract
_playlist
(webpage
) 
 568             return playlist_result
 
 570         tvid 
= self
._search
_regex
( 
 571             r
'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') 
 572         video_id = self._search_regex( 
 573             r'data-player-videoid\s*=\s*[\'"]([a
-f\d
]+)', webpage, 'video_id
') 
 574         _uuid = uuid.uuid4().hex 
 576         enc_key = self.get_enc_key(video_id) 
 578         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) 
 580         if raw_data['code
'] != 'A000000
': 
 581             raise ExtractorError('Unable to load data
. Error code
: ' + raw_data['code
']) 
 583         data = raw_data['data
'] 
 585         title = data['vi
']['vn
'] 
 587         # generate video_urls_dict 
 588         video_urls_dict = self.construct_video_urls( 
 589             data, video_id, _uuid, tvid) 
 593         for format_id in video_urls_dict: 
 594             video_urls = video_urls_dict[format_id] 
 595             for i, video_url_info in enumerate(video_urls): 
 596                 if len(entries) < i + 1: 
 597                     entries.append({'formats
': []}) 
 598                 entries[i]['formats
'].append( 
 600                         'url
': video_url_info[0], 
 601                         'filesize
': video_url_info[-1], 
 602                         'format_id
': format_id, 
 603                         'preference
': int(self.get_bid(format_id)) 
 607         for i in range(len(entries)): 
 608             self._sort_formats(entries[i]['formats
']) 
 611                     'id': '%s_part
%d' % (video_id, i + 1), 
 618                 '_type
': 'multi_video
', 
 625             info['id'] = video_id 
 626             info['title
'] = title