]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kuwo.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  16 class KuwoBaseIE(InfoExtractor
): 
  18         {'format': 'ape', 'ext': 'ape', 'preference': 100}, 
  19         {'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80}, 
  20         {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70}, 
  21         {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60}, 
  22         {'format': 'wma', 'ext': 'wma', 'preference': 20}, 
  23         {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10} 
  26     def _get_formats(self
, song_id
): 
  28         for file_format 
in self
._FORMATS
: 
  29             song_url 
= self
._download
_webpage
( 
  30                 'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' % 
  31                 (file_format
['ext'], file_format
.get('br', ''), song_id
), 
  32                 song_id
, note
='Download %s url info' % file_format
['format'], 
  35             if song_url 
== 'IPDeny': 
  36                 raise ExtractorError('This song is blocked in this region', expected
=True) 
  38             if song_url
.startswith('http://') or song_url
.startswith('https://'): 
  41                     'format_id': file_format
['format'], 
  42                     'format': file_format
['format'], 
  43                     'preference': file_format
['preference'], 
  44                     'abr': file_format
.get('abr'), 
  46         self
._sort
_formats
(formats
) 
  50 class KuwoIE(KuwoBaseIE
): 
  53     _VALID_URL 
= r
'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/' 
  55         'url': 'http://www.kuwo.cn/yinyue/635632/', 
  61             'upload_date': '20080122', 
  62             'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' 
  64         'skip': 'this song has been offline because of copyright issues', 
  66         'url': 'http://www.kuwo.cn/yinyue/6446136/', 
  72             'upload_date': '20150518', 
  79     def _real_extract(self
, url
): 
  80         song_id 
= self
._match
_id
(url
) 
  81         webpage 
= self
._download
_webpage
( 
  82             url
, song_id
, note
='Download song detail info', 
  83             errnote
='Unable to get song detail info') 
  84         if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage
: 
  85             raise ExtractorError('this song has been offline because of copyright issues', expected
=True) 
  87         song_name 
= self
._html
_search
_regex
( 
  88             r
'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage
, 'song name') 
  89         singer_name 
= self
._html
_search
_regex
( 
  90             r
'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"', 
  91             webpage
, 'singer name', fatal
=False) 
  92         lrc_content 
= clean_html(get_element_by_id('lrcContent', webpage
)) 
  93         if lrc_content 
== '暂无':     # indicates no lyrics 
  96         formats 
= self
._get
_formats
(song_id
) 
  98         album_id 
= self
._html
_search
_regex
( 
  99             r
'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"', 
 100             webpage
, 'album id', fatal
=False) 
 103         if album_id 
is not None: 
 104             album_info_page 
= self
._download
_webpage
( 
 105                 'http://www.kuwo.cn/album/%s/' % album_id
, song_id
, 
 106                 note
='Download album detail info', 
 107                 errnote
='Unable to get album detail info') 
 109             publish_time 
= self
._html
_search
_regex
( 
 110                 r
'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page
, 
 111                 'publish time', fatal
=False) 
 113                 publish_time 
= publish_time
.replace('-', '') 
 118             'creator': singer_name
, 
 119             'upload_date': publish_time
, 
 120             'description': lrc_content
, 
 125 class KuwoAlbumIE(InfoExtractor
): 
 126     IE_NAME 
= 'kuwo:album' 
 127     IE_DESC 
= '酷我音乐 - 专辑' 
 128     _VALID_URL 
= r
'http://www\.kuwo\.cn/album/(?P<id>\d+?)/' 
 130         'url': 'http://www.kuwo.cn/album/502294/', 
 134             'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c', 
 139     def _real_extract(self
, url
): 
 140         album_id 
= self
._match
_id
(url
) 
 142         webpage 
= self
._download
_webpage
( 
 143             url
, album_id
, note
='Download album info', 
 144             errnote
='Unable to get album info') 
 146         album_name 
= self
._html
_search
_regex
( 
 147             r
'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage
, 
 149         album_intro 
= remove_start( 
 150             clean_html(get_element_by_id('intro', webpage
)), 
 151             '%s简介:' % album_name
) 
 154             self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 155                 r
'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"', 
 158         return self
.playlist_result(entries
, album_id
, album_name
, album_intro
) 
 161 class KuwoChartIE(InfoExtractor
): 
 162     IE_NAME 
= 'kuwo:chart' 
 163     IE_DESC 
= '酷我音乐 - 排行榜' 
 164     _VALID_URL 
= r
'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' 
 166         'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm', 
 170             'description': 're:\d{4}第\d{2}期', 
 172         'playlist_mincount': 10, 
 175     def _real_extract(self
, url
): 
 176         chart_id 
= self
._match
_id
(url
) 
 177         webpage 
= self
._download
_webpage
( 
 178             url
, chart_id
, note
='Download chart info', 
 179             errnote
='Unable to get chart info') 
 181         chart_name 
= self
._html
_search
_regex
( 
 182             r
'<h1[^>]+class="unDis">([^<]+)</h1>', webpage
, 'chart name') 
 184         chart_desc 
= self
._html
_search
_regex
( 
 185             r
'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage
, 'chart desc') 
 188             self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 189                 r
'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage
) 
 191         return self
.playlist_result(entries
, chart_id
, chart_name
, chart_desc
) 
 194 class KuwoSingerIE(InfoExtractor
): 
 195     IE_NAME 
= 'kuwo:singer' 
 196     IE_DESC 
= '酷我音乐 - 歌手' 
 197     _VALID_URL 
= r
'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' 
 199         'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 
 202             'title': 'Bruno Mars', 
 204         'playlist_count': 10, 
 206         'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm', 
 211         'playlist_mincount': 95, 
 212         'skip': 'Regularly stalls travis build',  # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540 
 215     def _real_extract(self
, url
): 
 216         singer_id 
= self
._match
_id
(url
) 
 217         webpage 
= self
._download
_webpage
( 
 218             url
, singer_id
, note
='Download singer info', 
 219             errnote
='Unable to get singer info') 
 221         singer_name 
= self
._html
_search
_regex
( 
 222             r
'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage
, 'singer name' 
 226         first_page_only 
= False if re
.search(r
'/music(?:_\d+)?\.htm', url
) else True 
 227         for page_num 
in itertools
.count(1): 
 228             webpage 
= self
._download
_webpage
( 
 229                 'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id
, page_num
), 
 230                 singer_id
, note
='Download song list page #%d' % page_num
, 
 231                 errnote
='Unable to get song list page #%d' % page_num
) 
 234                 self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 235                     r
'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/', 
 237             ][:10 if first_page_only 
else None]) 
 239             if first_page_only 
or not re
.search(r
'<a[^>]+href="[^"]+">下一页</a>', webpage
): 
 242         return self
.playlist_result(entries
, singer_id
, singer_name
) 
 245 class KuwoCategoryIE(InfoExtractor
): 
 246     IE_NAME 
= 'kuwo:category' 
 247     IE_DESC 
= '酷我音乐 - 分类' 
 248     _VALID_URL 
= r
'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' 
 250         'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm', 
 254             'description': '这些都是属于八十年代的回忆!', 
 256         'playlist_count': 30, 
 259     def _real_extract(self
, url
): 
 260         category_id 
= self
._match
_id
(url
) 
 261         webpage 
= self
._download
_webpage
( 
 262             url
, category_id
, note
='Download category info', 
 263             errnote
='Unable to get category info') 
 265         category_name 
= self
._html
_search
_regex
( 
 266             r
'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage
, 'category name') 
 268         category_desc 
= remove_start( 
 269             get_element_by_id('intro', webpage
).strip(), 
 270             '%s简介:' % category_name
) 
 272         jsonm 
= self
._parse
_json
(self
._html
_search
_regex
( 
 273             r
'var\s+jsonm\s*=\s*([^;]+);', webpage
, 'category songs'), category_id
) 
 276             self
.url_result('http://www.kuwo.cn/yinyue/%s/' % song
['musicrid'], 'Kuwo') 
 277             for song 
in jsonm
['musiclist'] 
 279         return self
.playlist_result(entries
, category_id
, category_name
, category_desc
) 
 282 class KuwoMvIE(KuwoBaseIE
): 
 284     IE_DESC 
= '酷我音乐 - MV' 
 285     _VALID_URL 
= r
'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/' 
 287         'url': 'http://www.kuwo.cn/mv/6480076/', 
 295     _FORMATS 
= KuwoBaseIE
._FORMATS 
+ [ 
 296         {'format': 'mkv', 'ext': 'mkv', 'preference': 250}, 
 297         {'format': 'mp4', 'ext': 'mp4', 'preference': 200}, 
 300     def _real_extract(self
, url
): 
 301         song_id 
= self
._match
_id
(url
) 
 302         webpage 
= self
._download
_webpage
( 
 303             url
, song_id
, note
='Download mv detail info: %s' % song_id
, 
 304             errnote
='Unable to get mv detail info: %s' % song_id
) 
 307             r
'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"', 
 310             song_name 
= mobj
.group('song') 
 311             singer_name 
= mobj
.group('singer') 
 313             raise ExtractorError('Unable to find song or singer names') 
 315         formats 
= self
._get
_formats
(song_id
) 
 320             'creator': singer_name
,