]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/daisuki.py
   1 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  28 class DaisukiIE(InfoExtractor
): 
  29     _VALID_URL 
= r
'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html' 
  32         'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html', 
  36             'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', 
  42             'creator': 'BANDAI NAMCO Entertainment', 
  45             'skip_download': True,  # AES-encrypted HLS stream 
  49     # The public key in PEM format can be found in clientlibs_anime_watch.min.js 
  50     _RSA_KEY 
= (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537) 
  52     def _real_extract(self
, url
): 
  53         video_id 
= self
._match
_id
(url
) 
  55         webpage 
= self
._download
_webpage
(url
, video_id
) 
  57         flashvars 
= self
._parse
_json
(self
._search
_regex
( 
  58             r
'(?s)var\s+flashvars\s*=\s*({.+?});', webpage
, 'flashvars'), 
  59             video_id
, transform_source
=js_to_json
) 
  64         for key 
in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'): 
  65             data
[key
] = flashvars
.get(key
, '') 
  69         # Some AES keys are rejected. Try it with different AES keys 
  71             aes_key 
= [random
.randint(0, 254) for _ 
in range(32)] 
  72             padded_aeskey 
= intlist_to_bytes(pkcs1pad(aes_key
, 128)) 
  75             encrypted_aeskey 
= long_to_bytes(pow(bytes_to_long(padded_aeskey
), e
, n
)) 
  76             init_data 
= self
._download
_json
('http://www.daisuki.net/bin/bgn/init', video_id
, query
={ 
  77                 's': flashvars
.get('s', ''), 
  78                 'c': flashvars
.get('ss3_prm', ''), 
  80                 'd': base64
.b64encode(intlist_to_bytes(aes_cbc_encrypt( 
  81                     bytes_to_intlist(json
.dumps(data
)), 
  82                     aes_key
, iv
))).decode('ascii'), 
  83                 'a': base64
.b64encode(encrypted_aeskey
).decode('ascii'), 
  84             }, note
='Downloading JSON metadata' + (' (try #%d)' % (idx 
+ 1) if idx 
> 0 else '')) 
  86             if 'rtn' in init_data
: 
  87                 encrypted_rtn 
= init_data
['rtn'] 
  90             self
._sleep
(5, video_id
) 
  92         if encrypted_rtn 
is None: 
  93             raise ExtractorError('Failed to fetch init data') 
  95         rtn 
= self
._parse
_json
( 
  96             intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( 
  97                 base64
.b64decode(encrypted_rtn
)), 
  98                 aes_key
, iv
)).decode('utf-8').rstrip('\0'), 
 101         formats 
= self
._extract
_m
3u8_formats
( 
 102             rtn
['play_url'], video_id
, ext
='mp4', entry_protocol
='m3u8_native') 
 104         title 
= remove_end(self
._og
_search
_title
(webpage
), ' - DAISUKI') 
 106         creator 
= self
._html
_search
_regex
( 
 107             r
'Creator\s*:\s*([^<]+)', webpage
, 'creator', fatal
=False) 
 110         caption_url 
= rtn
.get('caption_url') 
 112             # mul: multiple languages 
 113             subtitles
['mul'] = [{ 
 122             'subtitles': subtitles
, 
 127 class DaisukiPlaylistIE(InfoExtractor
): 
 128     _VALID_URL 
= r
'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html' 
 131         'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html', 
 133             'id': 'TheIdolMasterCG', 
 134             'title': 'THE IDOLM@STER CINDERELLA GIRLS', 
 135             'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8', 
 137         'playlist_count': 26, 
 140     def _real_extract(self
, url
): 
 141         playlist_id 
= self
._match
_id
(url
) 
 143         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 145         episode_pattern 
= r
'''(?sx) 
 146             <img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+? 
 147             <p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)''' 
 149             '_type': 'url_transparent', 
 150             'url': url
.replace('detail', 'watch').replace('.html', '.' + movie_id 
+ '.html'), 
 151             'episode_id': episode_id
, 
 152             'episode_number': int_or_none(episode_id
), 
 153         } for movie_id
, episode_id 
in re
.findall(episode_pattern
, webpage
)] 
 155         playlist_title 
= remove_end( 
 156             self
._og
_search
_title
(webpage
, fatal
=False), ' - Anime - DAISUKI') 
 157         playlist_description 
= clean_html(get_element_by_id('synopsisTxt', webpage
)) 
 159         return self
.playlist_result(entries
, playlist_id
, playlist_title
, playlist_description
)