]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kuwo.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  16 class KuwoBaseIE(InfoExtractor
): 
  18         {'format': 'ape', 'ext': 'ape', 'preference': 100}, 
  19         {'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80}, 
  20         {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70}, 
  21         {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60}, 
  22         {'format': 'wma', 'ext': 'wma', 'preference': 20}, 
  23         {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10} 
  26     def _get_formats(self
, song_id
): 
  28         for file_format 
in self
._FORMATS
: 
  29             song_url 
= self
._download
_webpage
( 
  30                 'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' % 
  31                 (file_format
['ext'], file_format
.get('br', ''), song_id
), 
  32                 song_id
, note
='Download %s url info' % file_format
['format'], 
  34             if song_url
.startswith('http://') or song_url
.startswith('https://'): 
  37                     'format_id': file_format
['format'], 
  38                     'format': file_format
['format'], 
  39                     'preference': file_format
['preference'], 
  40                     'abr': file_format
.get('abr'), 
  42         self
._sort
_formats
(formats
) 
  46 class KuwoIE(KuwoBaseIE
): 
  49     _VALID_URL 
= r
'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/' 
  51         'url': 'http://www.kuwo.cn/yinyue/635632/', 
  57             'upload_date': '20080122', 
  58             'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' 
  61         'url': 'http://www.kuwo.cn/yinyue/6446136/', 
  67             'upload_date': '20150518', 
  74     def _real_extract(self
, url
): 
  75         song_id 
= self
._match
_id
(url
) 
  76         webpage 
= self
._download
_webpage
( 
  77             url
, song_id
, note
='Download song detail info', 
  78             errnote
='Unable to get song detail info') 
  80         song_name 
= self
._html
_search
_regex
( 
  81             r
'<h1[^>]+title="([^"]+)">', webpage
, 'song name') 
  82         singer_name 
= self
._html
_search
_regex
( 
  83             r
'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"', 
  84             webpage
, 'singer name', fatal
=False) 
  85         lrc_content 
= clean_html(get_element_by_id('lrcContent', webpage
)) 
  86         if lrc_content 
== '暂无':     # indicates no lyrics 
  89         formats 
= self
._get
_formats
(song_id
) 
  91         album_id 
= self
._html
_search
_regex
( 
  92             r
'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"', 
  93             webpage
, 'album id', fatal
=False) 
  96         if album_id 
is not None: 
  97             album_info_page 
= self
._download
_webpage
( 
  98                 'http://www.kuwo.cn/album/%s/' % album_id
, song_id
, 
  99                 note
='Download album detail info', 
 100                 errnote
='Unable to get album detail info') 
 102             publish_time 
= self
._html
_search
_regex
( 
 103                 r
'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page
, 
 104                 'publish time', fatal
=False) 
 106                 publish_time 
= publish_time
.replace('-', '') 
 111             'creator': singer_name
, 
 112             'upload_date': publish_time
, 
 113             'description': lrc_content
, 
 118 class KuwoAlbumIE(InfoExtractor
): 
 119     IE_NAME 
= 'kuwo:album' 
 120     IE_DESC 
= '酷我音乐 - 专辑' 
 121     _VALID_URL 
= r
'http://www\.kuwo\.cn/album/(?P<id>\d+?)/' 
 123         'url': 'http://www.kuwo.cn/album/502294/', 
 127             'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c', 
 132     def _real_extract(self
, url
): 
 133         album_id 
= self
._match
_id
(url
) 
 135         webpage 
= self
._download
_webpage
( 
 136             url
, album_id
, note
='Download album info', 
 137             errnote
='Unable to get album info') 
 139         album_name 
= self
._html
_search
_regex
( 
 140             r
'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage
, 
 142         album_intro 
= remove_start( 
 143             clean_html(get_element_by_id('intro', webpage
)), 
 144             '%s简介:' % album_name
) 
 147             self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 148                 r
'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"', 
 151         return self
.playlist_result(entries
, album_id
, album_name
, album_intro
) 
 154 class KuwoChartIE(InfoExtractor
): 
 155     IE_NAME 
= 'kuwo:chart' 
 156     IE_DESC 
= '酷我音乐 - 排行榜' 
 157     _VALID_URL 
= r
'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' 
 159         'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm', 
 163             'description': 're:\d{4}第\d{2}期', 
 165         'playlist_mincount': 10, 
 168     def _real_extract(self
, url
): 
 169         chart_id 
= self
._match
_id
(url
) 
 170         webpage 
= self
._download
_webpage
( 
 171             url
, chart_id
, note
='Download chart info', 
 172             errnote
='Unable to get chart info') 
 174         chart_name 
= self
._html
_search
_regex
( 
 175             r
'<h1[^>]+class="unDis">([^<]+)</h1>', webpage
, 'chart name') 
 177         chart_desc 
= self
._html
_search
_regex
( 
 178             r
'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage
, 'chart desc') 
 181             self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 182                 r
'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage
) 
 184         return self
.playlist_result(entries
, chart_id
, chart_name
, chart_desc
) 
 187 class KuwoSingerIE(InfoExtractor
): 
 188     IE_NAME 
= 'kuwo:singer' 
 189     IE_DESC 
= '酷我音乐 - 歌手' 
 190     _VALID_URL 
= r
'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' 
 192         'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 
 195             'title': 'Bruno Mars', 
 197         'playlist_count': 10, 
 199         'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm', 
 204         'playlist_mincount': 95, 
 207     def _real_extract(self
, url
): 
 208         singer_id 
= self
._match
_id
(url
) 
 209         webpage 
= self
._download
_webpage
( 
 210             url
, singer_id
, note
='Download singer info', 
 211             errnote
='Unable to get singer info') 
 213         singer_name 
= self
._html
_search
_regex
( 
 214             r
'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage
, 'singer name' 
 218         first_page_only 
= False if re
.search(r
'/music(?:_\d+)?\.htm', url
) else True 
 219         for page_num 
in itertools
.count(1): 
 220             webpage 
= self
._download
_webpage
( 
 221                 'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id
, page_num
), 
 222                 singer_id
, note
='Download song list page #%d' % page_num
, 
 223                 errnote
='Unable to get song list page #%d' % page_num
) 
 226                 self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 227                     r
'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/', 
 229             ][:10 if first_page_only 
else None]) 
 231             if first_page_only 
or not re
.search(r
'<a[^>]+href="[^"]+">下一页</a>', webpage
): 
 234         return self
.playlist_result(entries
, singer_id
, singer_name
) 
 237 class KuwoCategoryIE(InfoExtractor
): 
 238     IE_NAME 
= 'kuwo:category' 
 239     IE_DESC 
= '酷我音乐 - 分类' 
 240     _VALID_URL 
= r
'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' 
 242         'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm', 
 246             'description': '这些都是属于八十年代的回忆!', 
 248         'playlist_count': 30, 
 251     def _real_extract(self
, url
): 
 252         category_id 
= self
._match
_id
(url
) 
 253         webpage 
= self
._download
_webpage
( 
 254             url
, category_id
, note
='Download category info', 
 255             errnote
='Unable to get category info') 
 257         category_name 
= self
._html
_search
_regex
( 
 258             r
'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage
, 'category name') 
 260         category_desc 
= remove_start( 
 261             get_element_by_id('intro', webpage
).strip(), 
 262             '%s简介:' % category_name
) 
 264         jsonm 
= self
._parse
_json
(self
._html
_search
_regex
( 
 265             r
'var\s+jsonm\s*=\s*([^;]+);', webpage
, 'category songs'), category_id
) 
 268             self
.url_result('http://www.kuwo.cn/yinyue/%s/' % song
['musicrid'], 'Kuwo') 
 269             for song 
in jsonm
['musiclist'] 
 271         return self
.playlist_result(entries
, category_id
, category_name
, category_desc
) 
 274 class KuwoMvIE(KuwoBaseIE
): 
 276     IE_DESC 
= '酷我音乐 - MV' 
 277     _VALID_URL 
= r
'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/' 
 279         'url': 'http://www.kuwo.cn/mv/6480076/', 
 287     _FORMATS 
= KuwoBaseIE
._FORMATS 
+ [ 
 288         {'format': 'mkv', 'ext': 'mkv', 'preference': 250}, 
 289         {'format': 'mp4', 'ext': 'mp4', 'preference': 200}, 
 292     def _real_extract(self
, url
): 
 293         song_id 
= self
._match
_id
(url
) 
 294         webpage 
= self
._download
_webpage
( 
 295             url
, song_id
, note
='Download mv detail info: %s' % song_id
, 
 296             errnote
='Unable to get mv detail info: %s' % song_id
) 
 299             r
'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"', 
 302             song_name 
= mobj
.group('song') 
 303             singer_name 
= mobj
.group('singer') 
 305             raise ExtractorError('Unable to find song or singer names') 
 307         formats 
= self
._get
_formats
(song_id
) 
 312             'creator': singer_name
,