2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  14     compat_urllib_parse_urlencode
, 
  29 class LeIE(InfoExtractor
): 
  31     _VALID_URL 
= r
'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html' 
  32     _GEO_COUNTRIES 
= ['CN'] 
  33     _URL_TEMPLATE 
= 'http://www.le.com/ptv/vplay/%s.html' 
  36         'url': 'http://www.le.com/ptv/vplay/22005890.html', 
  37         'md5': 'edadcfe5406976f42f9f266057ee5e40', 
  41             'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家', 
  42             'description': 'md5:a9cb175fd753e2962176b7beca21a47c', 
  45             'hls_prefer_native': True, 
  48         'url': 'http://www.le.com/ptv/vplay/1415246.html', 
  53             'description': 'md5:28942e650e82ed4fcc8e4de919ee854d', 
  56             'hls_prefer_native': True, 
  59         'note': 'This video is available only in Mainland China, thus a proxy is needed', 
  60         'url': 'http://www.le.com/ptv/vplay/1118082.html', 
  61         'md5': '2424c74948a62e5f31988438979c5ad1', 
  66             'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', 
  69             'hls_prefer_native': True, 
  72         'url': 'http://sports.le.com/video/25737697.html', 
  73         'only_matching': True, 
  75         'url': 'http://www.lesports.com/match/1023203003.html', 
  76         'only_matching': True, 
  78         'url': 'http://sports.le.com/match/1023203003.html', 
  79         'only_matching': True, 
  82     # ror() and calc_time_key() are reversed from a embedded swf file in LetvPlayer.swf 
  83     def ror(self
, param1
, param2
): 
  85         while _loc3_ 
< param2
: 
  86             param1 
= urshift(param1
, 1) + ((param1 
& 1) << 31) 
  90     def calc_time_key(self
, param1
): 
  92         return self
.ror(param1
, _loc2_ 
% 17) ^ _loc2_
 
  94     # see M3U8Encryption class in KLetvPlayer.swf 
  96     def decrypt_m3u8(encrypted_data
): 
  97         if encrypted_data
[:5].decode('utf-8').lower() != 'vc_01': 
  99         encrypted_data 
= encrypted_data
[5:] 
 101         _loc4_ 
= bytearray(2 * len(encrypted_data
)) 
 102         for idx
, val 
in enumerate(encrypted_data
): 
 104             _loc4_
[2 * idx
] = b 
// 16 
 105             _loc4_
[2 * idx 
+ 1] = b 
% 16 
 106         idx 
= len(_loc4_
) - 11 
 107         _loc4_ 
= _loc4_
[idx
:] + _loc4_
[:idx
] 
 108         _loc7_ 
= bytearray(len(encrypted_data
)) 
 109         for i 
in range(len(encrypted_data
)): 
 110             _loc7_
[i
] = _loc4_
[2 * i
] * 16 + _loc4_
[2 * i 
+ 1] 
 114     def _check_errors(self
, play_json
): 
 116         playstatus 
= play_json
['msgs']['playstatus'] 
 117         if playstatus
['status'] == 0: 
 118             flag 
= playstatus
['flag'] 
 120                 self
.raise_geo_restricted() 
 122                 raise ExtractorError('Generic error. flag = %d' % flag
, expected
=True) 
 124     def _real_extract(self
, url
): 
 125         media_id 
= self
._match
_id
(url
) 
 126         page 
= self
._download
_webpage
(url
, media_id
) 
 128         play_json_flash 
= self
._download
_json
( 
 129             'http://player-pc.le.com/mms/out/video/playJson', 
 130             media_id
, 'Downloading flash playJson data', query
={ 
 136                 'tkey': self
.calc_time_key(int(time
.time())), 
 137                 'domain': 'www.le.com', 
 140             headers
=self
.geo_verification_headers()) 
 141         self
._check
_errors
(play_json_flash
) 
 143         def get_flash_urls(media_url
, format_id
): 
 144             nodes_data 
= self
._download
_json
( 
 146                 'Download JSON metadata for format %s' % format_id
, 
 154             req 
= self
._request
_webpage
( 
 155                 nodes_data
['nodelist'][0]['location'], media_id
, 
 156                 note
='Downloading m3u8 information for format %s' % format_id
) 
 158             m3u8_data 
= self
.decrypt_m3u8(req
.read()) 
 161                 'hls': encode_data_uri(m3u8_data
, 'application/vnd.apple.mpegurl'), 
 164         extracted_formats 
= [] 
 166         playurl 
= play_json_flash
['msgs']['playurl'] 
 167         play_domain 
= playurl
['domain'][0] 
 169         for format_id
, format_data 
in playurl
.get('dispatch', []).items(): 
 170             if format_id 
in extracted_formats
: 
 172             extracted_formats
.append(format_id
) 
 174             media_url 
= play_domain 
+ format_data
[0] 
 175             for protocol
, format_url 
in get_flash_urls(media_url
, format_id
).items(): 
 178                     'ext': determine_ext(format_data
[1]), 
 179                     'format_id': '%s-%s' % (protocol
, format_id
), 
 180                     'protocol': 'm3u8_native' if protocol 
== 'hls' else 'http', 
 181                     'quality': int_or_none(format_id
), 
 184                 if format_id
[-1:] == 'p': 
 185                     f
['height'] = int_or_none(format_id
[:-1]) 
 188         self
._sort
_formats
(formats
, ('height', 'quality', 'format_id')) 
 190         publish_time 
= parse_iso8601(self
._html
_search
_regex
( 
 191             r
'发布时间 ([^<>]+) ', page
, 'publish time', default
=None), 
 192             delimiter
=' ', timezone
=datetime
.timedelta(hours
=8)) 
 193         description 
= self
._html
_search
_meta
('description', page
, fatal
=False) 
 198             'title': playurl
['title'], 
 199             'thumbnail': playurl
['pic'], 
 200             'description': description
, 
 201             'timestamp': publish_time
, 
 205 class LePlaylistIE(InfoExtractor
): 
 206     _VALID_URL 
= r
'https?://[a-z]+\.le\.com/(?!video)[a-z]+/(?P<id>[a-z0-9_]+)' 
 209         'url': 'http://www.le.com/tv/46177.html', 
 213             'description': 'md5:395666ff41b44080396e59570dbac01c' 
 217         'url': 'http://tv.le.com/izt/wuzetian/index.html', 
 221             'description': 'md5:e12499475ab3d50219e5bba00b3cb248' 
 223         # This playlist contains some extra videos other than the drama itself 
 224         'playlist_mincount': 96 
 226         'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml', 
 227         # This series is moved to http://www.le.com/tv/10005297.html 
 228         'only_matching': True, 
 230         'url': 'http://www.le.com/comic/92063.html', 
 231         'only_matching': True, 
 233         'url': 'http://list.le.com/listn/c1009_sc532002_d2_p1_o1.html', 
 234         'only_matching': True, 
 238     def suitable(cls
, url
): 
 239         return False if LeIE
.suitable(url
) else super(LePlaylistIE
, cls
).suitable(url
) 
 241     def _real_extract(self
, url
): 
 242         playlist_id 
= self
._match
_id
(url
) 
 243         page 
= self
._download
_webpage
(url
, playlist_id
) 
 245         # Currently old domain names are still used in playlists 
 246         media_ids 
= orderedSet(re
.findall( 
 247             r
'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page
)) 
 248         entries 
= [self
.url_result(LeIE
._URL
_TEMPLATE 
% media_id
, ie
='Le') 
 249                    for media_id 
in media_ids
] 
 251         title 
= self
._html
_search
_meta
('keywords', page
, 
 252                                        fatal
=False).split(',')[0] 
 253         description 
= self
._html
_search
_meta
('description', page
, fatal
=False) 
 255         return self
.playlist_result(entries
, playlist_id
, playlist_title
=title
, 
 256                                     playlist_description
=description
) 
 259 class LetvCloudIE(InfoExtractor
): 
 260     # Most of *.letv.com is changed to *.le.com on 2016/01/02 
 261     # but yuntv.letv.com is kept, so also keep the extractor name 
 263     _VALID_URL 
= r
'https?://yuntv\.letv\.com/bcloud.html\?.+' 
 266         'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf', 
 267         'md5': '26450599afd64c513bc77030ad15db44', 
 269             'id': 'p7jnfw5hw9_467623dedf', 
 271             'title': 'Video p7jnfw5hw9_467623dedf', 
 274         'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360', 
 275         'md5': 'e03d9cc8d9c13191e1caf277e42dbd31', 
 277             'id': 'p7jnfw5hw9_ec93197892', 
 279             'title': 'Video p7jnfw5hw9_ec93197892', 
 282         'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd', 
 283         'md5': 'cb988699a776b22d4a41b9d43acfb3ac', 
 285             'id': 'p7jnfw5hw9_187060b6fd', 
 287             'title': 'Video p7jnfw5hw9_187060b6fd', 
 293         if obj
['cf'] == 'flash': 
 294             salt 
= '2f9d6924b33a165a6d8b5d3d42f4f987' 
 295             items 
= ['cf', 'format', 'ran', 'uu', 'ver', 'vu'] 
 296         elif obj
['cf'] == 'html5': 
 297             salt 
= 'fbeh5player12c43eccf2bec3300344' 
 298             items 
= ['cf', 'ran', 'uu', 'bver', 'vu'] 
 299         input_data 
= ''.join([item 
+ obj
[item
] for item 
in items
]) + salt
 
 300         obj
['sign'] = hashlib
.md5(input_data
.encode('utf-8')).hexdigest() 
 302     def _get_formats(self
, cf
, uu
, vu
, media_id
): 
 303         def get_play_json(cf
, timestamp
): 
 307                 'bver': 'firefox44.0', 
 311                 'ran': compat_str(timestamp
), 
 314             return self
._download
_json
( 
 315                 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse_urlencode(data
), 
 316                 media_id
, 'Downloading playJson data for type %s' % cf
) 
 318         play_json 
= get_play_json(cf
, time
.time()) 
 319         # The server time may be different from local time 
 320         if play_json
.get('code') == 10071: 
 321             play_json 
= get_play_json(cf
, play_json
['timestamp']) 
 323         if not play_json
.get('data'): 
 324             if play_json
.get('message'): 
 325                 raise ExtractorError('Letv cloud said: %s' % play_json
['message'], expected
=True) 
 326             elif play_json
.get('code'): 
 327                 raise ExtractorError('Letv cloud returned error %d' % play_json
['code'], expected
=True) 
 329                 raise ExtractorError('Letv cloud returned an unknown error') 
 332             return compat_b64decode(s
).decode('utf-8') 
 335         for media 
in play_json
['data']['video_info']['media'].values(): 
 336             play_url 
= media
['play_url'] 
 337             url 
= b64decode(play_url
['main_url']) 
 338             decoded_url 
= b64decode(url_basename(url
)) 
 341                 'ext': determine_ext(decoded_url
), 
 342                 'format_id': str_or_none(play_url
.get('vtype')), 
 343                 'format_note': str_or_none(play_url
.get('definition')), 
 344                 'width': int_or_none(play_url
.get('vwidth')), 
 345                 'height': int_or_none(play_url
.get('vheight')), 
 350     def _real_extract(self
, url
): 
 351         uu_mobj 
= re
.search(r
'uu=([\w]+)', url
) 
 352         vu_mobj 
= re
.search(r
'vu=([\w]+)', url
) 
 354         if not uu_mobj 
or not vu_mobj
: 
 355             raise ExtractorError('Invalid URL: %s' % url
, expected
=True) 
 357         uu 
= uu_mobj
.group(1) 
 358         vu 
= vu_mobj
.group(1) 
 359         media_id 
= uu 
+ '_' + vu
 
 361         formats 
= self
._get
_formats
('flash', uu
, vu
, media_id
) + self
._get
_formats
('html5', uu
, vu
, media_id
) 
 362         self
._sort
_formats
(formats
) 
 366             'title': 'Video %s' % media_id
,