2 from __future__ 
import unicode_literals
 
  13 from .common 
import InfoExtractor
 
  14 from ..compat 
import ( 
  18     compat_urllib_parse_urlparse
, 
  31     return hashlib
.md5(text
.encode('utf-8')).hexdigest() 
  34 class IqiyiSDK(object): 
  35     def __init__(self
, target
, ip
, timestamp
): 
  38         self
.timestamp 
= timestamp
 
  42         return compat_str(sum(map(lambda p
: int(p
, 16), list(data
)))) 
  46         if isinstance(num
, int): 
  48         return compat_str(sum(map(int, num
))) 
  51         even 
= self
.digit_sum(compat_str(self
.timestamp
)[::2]) 
  52         odd 
= self
.digit_sum(compat_str(self
.timestamp
)[1::2]) 
  55     def preprocess(self
, chunksize
): 
  56         self
.target 
= md5_text(self
.target
) 
  58         for i 
in range(32 // chunksize
): 
  59             chunks
.append(self
.target
[chunksize 
* i
:chunksize 
* (i 
+ 1)]) 
  61             chunks
.append(self
.target
[32 - 32 % chunksize
:]) 
  62         return chunks
, list(map(int, self
.ip
.split('.'))) 
  64     def mod(self
, modulus
): 
  65         chunks
, ip 
= self
.preprocess(32) 
  66         self
.target 
= chunks
[0] + ''.join(map(lambda p
: compat_str(p 
% modulus
), ip
)) 
  68     def split(self
, chunksize
): 
  75         chunks
, ip 
= self
.preprocess(chunksize
) 
  77         for i 
in range(len(chunks
)): 
  78             ip_part 
= compat_str(ip
[i
] % modulus_map
[chunksize
]) if i 
< 4 else '' 
  80                 ret 
+= ip_part 
+ chunks
[i
] 
  82                 ret 
+= chunks
[i
] + ip_part
 
  85     def handle_input16(self
): 
  86         self
.target 
= md5_text(self
.target
) 
  87         self
.target 
= self
.split_sum(self
.target
[:16]) + self
.target 
+ self
.split_sum(self
.target
[16:]) 
  89     def handle_input8(self
): 
  90         self
.target 
= md5_text(self
.target
) 
  93             part 
= self
.target
[8 * i
:8 * (i 
+ 1)] 
  94             ret 
+= self
.split_sum(part
) + part
 
  98         self
.target 
= md5_text(self
.target
) 
  99         self
.target 
= self
.split_sum(self
.target
) + self
.target
 
 101     def date(self
, scheme
): 
 102         self
.target 
= md5_text(self
.target
) 
 103         d 
= time
.localtime(self
.timestamp
) 
 105             'y': compat_str(d
.tm_year
), 
 106             'm': '%02d' % d
.tm_mon
, 
 107             'd': '%02d' % d
.tm_mday
, 
 109         self
.target 
+= ''.join(map(lambda c
: strings
[c
], list(scheme
))) 
 111     def split_time_even_odd(self
): 
 112         even
, odd 
= self
.even_odd() 
 113         self
.target 
= odd 
+ md5_text(self
.target
) + even
 
 115     def split_time_odd_even(self
): 
 116         even
, odd 
= self
.even_odd() 
 117         self
.target 
= even 
+ md5_text(self
.target
) + odd
 
 119     def split_ip_time_sum(self
): 
 120         chunks
, ip 
= self
.preprocess(32) 
 121         self
.target 
= compat_str(sum(ip
)) + chunks
[0] + self
.digit_sum(self
.timestamp
) 
 123     def split_time_ip_sum(self
): 
 124         chunks
, ip 
= self
.preprocess(32) 
 125         self
.target 
= self
.digit_sum(self
.timestamp
) + chunks
[0] + compat_str(sum(ip
)) 
 128 class IqiyiSDKInterpreter(object): 
 129     BASE62_TABLE 
= '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' 
 131     def __init__(self
, sdk_code
): 
 132         self
.sdk_code 
= sdk_code
 
 135     def base62(cls
, num
): 
 140             ret 
= cls
.BASE62_TABLE
[num 
% 62] + ret
 
 144     def decode_eval_codes(self
): 
 145         self
.sdk_code 
= self
.sdk_code
[5:-3] 
 148             r
"'([^']+)',62,(\d+),'([^']+)'\.split\('\|'\),[^,]+,{}", 
 150         obfucasted_code
, count
, symbols 
= mobj
.groups() 
 152         symbols 
= symbols
.split('|') 
 157             b62count 
= self
.base62(count
) 
 158             symbol_table
[b62count
] = symbols
[count
] or b62count
 
 160         self
.sdk_code 
= re
.sub( 
 161             r
'\b(\w+)\b', lambda mobj
: symbol_table
[mobj
.group(0)], 
 164     def run(self
, target
, ip
, timestamp
): 
 165         self
.decode_eval_codes() 
 167         functions 
= re
.findall(r
'input=([a-zA-Z0-9]+)\(input', self
.sdk_code
) 
 169         sdk 
= IqiyiSDK(target
, ip
, timestamp
) 
 172             'handleSum': sdk
.handleSum
, 
 173             'handleInput8': sdk
.handle_input8
, 
 174             'handleInput16': sdk
.handle_input16
, 
 175             'splitTimeEvenOdd': sdk
.split_time_even_odd
, 
 176             'splitTimeOddEven': sdk
.split_time_odd_even
, 
 177             'splitIpTimeSum': sdk
.split_ip_time_sum
, 
 178             'splitTimeIpSum': sdk
.split_time_ip_sum
, 
 180         for function 
in functions
: 
 181             if re
.match(r
'mod\d+', function
): 
 182                 sdk
.mod(int(function
[3:])) 
 183             elif re
.match(r
'date[ymd]{3}', function
): 
 184                 sdk
.date(function
[4:]) 
 185             elif re
.match(r
'split\d+', function
): 
 186                 sdk
.split(int(function
[5:])) 
 187             elif function 
in other_functions
: 
 188                 other_functions
[function
]() 
 190                 raise ExtractorError('Unknown funcion %s' % function
) 
 195 class IqiyiIE(InfoExtractor
): 
 199     _VALID_URL 
= r
'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' 
 201     _NETRC_MACHINE 
= 'iqiyi' 
 204         'url': 'http://www.iqiyi.com/v_19rrojlavg.html', 
 205         'md5': '2cb594dc2781e6c941a110d8f358118b', 
 207             'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', 
 208             'title': '美国德州空中惊现奇异云团 酷似UFO', 
 212         'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', 
 214             'id': 'e3f585b550a280af23c98b6cb2be19fb', 
 215             'title': '名侦探柯南第752集', 
 219                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', 
 221                 'title': '名侦探柯南第752集', 
 225                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', 
 227                 'title': '名侦探柯南第752集', 
 231                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', 
 233                 'title': '名侦探柯南第752集', 
 237                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', 
 239                 'title': '名侦探柯南第752集', 
 243                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', 
 245                 'title': '名侦探柯南第752集', 
 249                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', 
 251                 'title': '名侦探柯南第752集', 
 255                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', 
 257                 'title': '名侦探柯南第752集', 
 261                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', 
 263                 'title': '名侦探柯南第752集', 
 267             'skip_download': True, 
 270         'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', 
 271         'only_matching': True, 
 273         'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', 
 274         'only_matching': True, 
 276         'url': 'http://yule.iqiyi.com/pcb.html', 
 277         'only_matching': True, 
 279         # VIP-only video. The first 2 parts (6 minutes) are available without login 
 280         # MD5 sums omitted as values are different on Travis CI and my machine 
 281         'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', 
 283             'id': 'f3cf468b39dddb30d676f89a91200dc1', 
 288                 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1', 
 294                 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2', 
 299         'expected_warnings': ['Needs a VIP account for full video'], 
 301         'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', 
 306         'playlist_count': 101, 
 318     def _real_initialize(self
): 
 323         # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js 
 324         N 
= 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd 
 327         return ohdave_rsa_encrypt(data
, e
, N
) 
 330         (username
, password
) = self
._get
_login
_info
() 
 332         # No authentication to be performed 
 336         data 
= self
._download
_json
( 
 337             'http://kylin.iqiyi.com/get_token', None, 
 338             note
='Get token for logging', errnote
='Unable to get token for logging') 
 340         timestamp 
= int(time
.time()) 
 341         target 
= '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( 
 342             username
, self
._rsa
_fun
(password
.encode('utf-8'))) 
 344         interp 
= IqiyiSDKInterpreter(sdk
) 
 345         sign 
= interp
.run(target
, data
['ip'], timestamp
) 
 347         validation_params 
= { 
 349             'server': 'BEA3AA1908656AABCCFF76582C4C6660', 
 350             'token': data
['token'], 
 351             'bird_src': 'f8d91d57af224da7893dd397d52d811a', 
 355         validation_result 
= self
._download
_json
( 
 356             'http://kylin.iqiyi.com/validate?' + compat_urllib_parse
.urlencode(validation_params
), None, 
 357             note
='Validate credentials', errnote
='Unable to validate credentials') 
 360             'P00107': 'please login via the web interface and enter the CAPTCHA code', 
 361             'P00117': 'bad username or password', 
 364         code 
= validation_result
['code'] 
 366             msg 
= MSG_MAP
.get(code
) 
 368                 msg 
= 'error %s' % code
 
 369                 if validation_result
.get('msg'): 
 370                     msg 
+= ': ' + validation_result
['msg'] 
 371             self
._downloader
.report_warning('unable to log in: ' + msg
) 
 376     def _authenticate_vip_video(self
, api_video_url
, video_id
, tvid
, _uuid
, do_report_warning
): 
 378             # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as 
 380             'platform': 'b6c13e26323c537d', 
 385             'playType': 'main',  # XXX: always main? 
 386             'filename': os
.path
.splitext(url_basename(api_video_url
))[0], 
 389         qd_items 
= compat_parse_qs(compat_urllib_parse_urlparse(api_video_url
).query
) 
 390         for key
, val 
in qd_items
.items(): 
 391             auth_params
[key
] = val
[0] 
 393         auth_req 
= sanitized_Request( 
 394             'http://api.vip.iqiyi.com/services/ckn.action', 
 395             urlencode_postdata(auth_params
)) 
 396         # iQiyi server throws HTTP 405 error without the following header 
 397         auth_req
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 398         auth_result 
= self
._download
_json
( 
 400             note
='Downloading video authentication JSON', 
 401             errnote
='Unable to download video authentication JSON') 
 402         if auth_result
['code'] == 'Q00506':  # requires a VIP account 
 403             if do_report_warning
: 
 404                 self
.report_warning('Needs a VIP account for full video') 
 409     def construct_video_urls(self
, data
, video_id
, _uuid
, tvid
): 
 418         def get_encode_code(l
): 
 423             for i 
in range(c 
- 1, -1, -1): 
 424                 a 
= do_xor(int(b
[c 
- i 
- 1], 16), i
) 
 428         def get_path_key(x
, format_id
, segment_index
): 
 429             mg 
= ')(*&^flash@#$%a' 
 430             tm 
= self
._download
_json
( 
 431                 'http://data.video.qiyi.com/t?tn=' + str(random
.random()), video_id
, 
 432                 note
='Download path key of segment %d for format %s' % (segment_index 
+ 1, format_id
) 
 434             t 
= str(int(math
.floor(int(tm
) / (600.0)))) 
 435             return md5_text(t 
+ mg 
+ x
) 
 438         need_vip_warning_report 
= True 
 439         for format_item 
in data
['vp']['tkl'][0]['vs']: 
 440             if 0 < int(format_item
['bid']) <= 10: 
 441                 format_id 
= self
.get_format(format_item
['bid']) 
 447             video_urls_info 
= format_item
['fs'] 
 448             if not format_item
['fs'][0]['l'].startswith('/'): 
 449                 t 
= get_encode_code(format_item
['fs'][0]['l']) 
 450                 if t
.endswith('mp4'): 
 451                     video_urls_info 
= format_item
['flvs'] 
 453             for segment_index
, segment 
in enumerate(video_urls_info
): 
 455                 if not vl
.startswith('/'): 
 456                     vl 
= get_encode_code(vl
) 
 457                 is_vip_video 
= '/vip/' in vl
 
 458                 filesize 
= segment
['b'] 
 459                 base_url 
= data
['vp']['du'].split('/') 
 462                         vl
.split('/')[-1].split('.')[0], format_id
, segment_index
) 
 463                     base_url
.insert(-1, key
) 
 464                 base_url 
= '/'.join(base_url
) 
 467                     'qyid': uuid
.uuid4().hex, 
 472                     'tn': str(int(time
.time())) 
 474                 api_video_url 
= base_url 
+ vl
 
 476                     api_video_url 
= api_video_url
.replace('.f4v', '.hml') 
 477                     auth_result 
= self
._authenticate
_vip
_video
( 
 478                         api_video_url
, video_id
, tvid
, _uuid
, need_vip_warning_report
) 
 479                     if auth_result 
is False: 
 480                         need_vip_warning_report 
= False 
 483                         't': auth_result
['data']['t'], 
 484                         # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as 
 485                         'cid': 'afbe8fd3d73448c9', 
 487                         'QY00001': auth_result
['data']['u'], 
 489                 api_video_url 
+= '?' if '?' not in api_video_url 
else '&' 
 490                 api_video_url 
+= compat_urllib_parse
.urlencode(param
) 
 491                 js 
= self
._download
_json
( 
 492                     api_video_url
, video_id
, 
 493                     note
='Download video info of segment %d for format %s' % (segment_index 
+ 1, format_id
)) 
 496                     (video_url
, filesize
)) 
 498             video_urls_dict
[format_id
] = video_urls
 
 499         return video_urls_dict
 
 501     def get_format(self
, bid
): 
 502         matched_format_ids 
= [_format_id 
for _bid
, _format_id 
in self
._FORMATS
_MAP 
if _bid 
== str(bid
)] 
 503         return matched_format_ids
[0] if len(matched_format_ids
) else None 
 505     def get_bid(self
, format_id
): 
 506         matched_bids 
= [_bid 
for _bid
, _format_id 
in self
._FORMATS
_MAP 
if _format_id 
== format_id
] 
 507         return matched_bids
[0] if len(matched_bids
) else None 
 509     def get_raw_data(self
, tvid
, video_id
, enc_key
, _uuid
): 
 510         tm 
= str(int(time
.time())) 
 514             'src': md5_text('youtube-dl'), 
 519             'enc': md5_text(enc_key 
+ tail
), 
 521             'tn': random
.random(), 
 523             'authkey': md5_text(md5_text('') + tail
), 
 527         api_url 
= 'http://cache.video.qiyi.com/vms' + '?' + \
 
 528             compat_urllib_parse
.urlencode(param
) 
 529         raw_data 
= self
._download
_json
(api_url
, video_id
) 
 532     def get_enc_key(self
, swf_url
, video_id
): 
 533         # TODO: automatic key extraction 
 534         # last update at 2016-01-22 for Zombie::bite 
 535         enc_key 
= '6ab6d0280511493ba85594779759d4ed' 
 538     def _extract_playlist(self
, webpage
): 
 542             r
'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', 
 547         album_id 
= self
._search
_regex
( 
 548             r
'albumId\s*:\s*(\d+),', webpage
, 'album ID') 
 549         album_title 
= self
._search
_regex
( 
 550             r
'data-share-title="([^"]+)"', webpage
, 'album title', fatal
=False) 
 552         entries 
= list(map(self
.url_result
, links
)) 
 554         # Start from 2 because links in the first page are already on webpage 
 555         for page_num 
in itertools
.count(2): 
 556             pagelist_page 
= self
._download
_webpage
( 
 557                 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id
, page_num
, PAGE_SIZE
), 
 559                 note
='Download playlist page %d' % page_num
, 
 560                 errnote
='Failed to download playlist page %d' % page_num
) 
 561             pagelist 
= self
._parse
_json
( 
 562                 remove_start(pagelist_page
, 'var tvInfoJs='), album_id
) 
 563             vlist 
= pagelist
['data']['vlist'] 
 565                 entries
.append(self
.url_result(item
['vurl'])) 
 566             if len(vlist
) < PAGE_SIZE
: 
 569         return self
.playlist_result(entries
, album_id
, album_title
) 
 571     def _real_extract(self
, url
): 
 572         webpage 
= self
._download
_webpage
( 
 573             url
, 'temp_id', note
='download video page') 
 575         # There's no simple way to determine whether an URL is a playlist or not 
 577         playlist_result 
= self
._extract
_playlist
(webpage
) 
 579             return playlist_result
 
 581         tvid 
= self
._search
_regex
( 
 582             r
'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') 
 583         video_id = self._search_regex( 
 584             r'data-player-videoid\s*=\s*[\'"]([a
-f\d
]+)', webpage, 'video_id
') 
 585         swf_url = self._search_regex( 
 586             r'(http
://[^
\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') 
 587         _uuid = uuid.uuid4().hex 
 589         enc_key = self.get_enc_key(swf_url, video_id) 
 591         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) 
 593         if raw_data['code'] != 'A000000': 
 594             raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) 
 596         data = raw_data['data'] 
 598         title = data['vi']['vn'] 
 600         # generate video_urls_dict 
 601         video_urls_dict = self.construct_video_urls( 
 602             data, video_id, _uuid, tvid) 
 606         for format_id in video_urls_dict: 
 607             video_urls = video_urls_dict[format_id] 
 608             for i, video_url_info in enumerate(video_urls): 
 609                 if len(entries) < i + 1: 
 610                     entries.append({'formats': []}) 
 611                 entries[i]['formats'].append( 
 613                         'url': video_url_info[0], 
 614                         'filesize': video_url_info[-1], 
 615                         'format_id': format_id, 
 616                         'preference': int(self.get_bid(format_id)) 
 620         for i in range(len(entries)): 
 621             self._sort_formats(entries[i]['formats']) 
 624                     'id': '%s_part%d' % (video_id, i + 1), 
 631                 '_type': 'multi_video', 
 638             info['id'] = video_id 
 639             info['title'] = title