]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kuwo.py
0221fb9191879cef1d57e42238b5d86737cdd621
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class KuwoBaseIE(InfoExtractor
): 
  18         {'format': 'ape', 'ext': 'ape', 'preference': 100}, 
  19         {'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80}, 
  20         {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70}, 
  21         {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60}, 
  22         {'format': 'wma', 'ext': 'wma', 'preference': 20}, 
  23         {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10} 
  26     def _get_formats(self
, song_id
, tolerate_ip_deny
=False): 
  28         for file_format 
in self
._FORMATS
: 
  30             cn_verification_proxy 
= self
._downloader
.params
.get('cn_verification_proxy') 
  31             if cn_verification_proxy
: 
  32                 headers
['Ytdl-request-proxy'] = cn_verification_proxy
 
  35                 'format': file_format
['ext'], 
  36                 'br': file_format
.get('br', ''), 
  37                 'rid': 'MUSIC_%s' % song_id
, 
  38                 'type': 'convert_url', 
  42             song_url 
= self
._download
_webpage
( 
  43                 'http://antiserver.kuwo.cn/anti.s', 
  44                 song_id
, note
='Download %s url info' % file_format
['format'], 
  45                 query
=query
, headers
=headers
, 
  48             if song_url 
== 'IPDeny' and not tolerate_ip_deny
: 
  49                 raise ExtractorError('This song is blocked in this region', expected
=True) 
  51             if song_url
.startswith('http://') or song_url
.startswith('https://'): 
  54                     'format_id': file_format
['format'], 
  55                     'format': file_format
['format'], 
  56                     'preference': file_format
['preference'], 
  57                     'abr': file_format
.get('abr'), 
  63 class KuwoIE(KuwoBaseIE
): 
  66     _VALID_URL 
= r
'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)' 
  68         'url': 'http://www.kuwo.cn/yinyue/635632/', 
  74             'upload_date': '20080122', 
  75             'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' 
  77         'skip': 'this song has been offline because of copyright issues', 
  79         'url': 'http://www.kuwo.cn/yinyue/6446136/', 
  84             'description': 'md5:5d0e947b242c35dc0eb1d2fce9fbf02c', 
  86             'upload_date': '20150518', 
  92         'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016', 
  93         'only_matching': True, 
  96     def _real_extract(self
, url
): 
  97         song_id 
= self
._match
_id
(url
) 
  98         webpage 
= self
._download
_webpage
( 
  99             url
, song_id
, note
='Download song detail info', 
 100             errnote
='Unable to get song detail info') 
 101         if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage
: 
 102             raise ExtractorError('this song has been offline because of copyright issues', expected
=True) 
 104         song_name 
= self
._html
_search
_regex
( 
 105             r
'<p[^>]+id="lrcName">([^<]+)</p>', webpage
, 'song name') 
 106         singer_name 
= remove_start(self
._html
_search
_regex
( 
 107             r
'<a[^>]+href="http://www\.kuwo\.cn/artist/content\?name=([^"]+)">', 
 108             webpage
, 'singer name', fatal
=False), '歌手') 
 109         lrc_content 
= clean_html(get_element_by_id('lrcContent', webpage
)) 
 110         if lrc_content 
== '暂无':     # indicates no lyrics 
 113         formats 
= self
._get
_formats
(song_id
) 
 114         self
._sort
_formats
(formats
) 
 116         album_id 
= self
._html
_search
_regex
( 
 117             r
'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"', 
 118             webpage
, 'album id', fatal
=False) 
 121         if album_id 
is not None: 
 122             album_info_page 
= self
._download
_webpage
( 
 123                 'http://www.kuwo.cn/album/%s/' % album_id
, song_id
, 
 124                 note
='Download album detail info', 
 125                 errnote
='Unable to get album detail info') 
 127             publish_time 
= self
._html
_search
_regex
( 
 128                 r
'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page
, 
 129                 'publish time', fatal
=False) 
 131                 publish_time 
= publish_time
.replace('-', '') 
 136             'creator': singer_name
, 
 137             'upload_date': publish_time
, 
 138             'description': lrc_content
, 
 143 class KuwoAlbumIE(InfoExtractor
): 
 144     IE_NAME 
= 'kuwo:album' 
 145     IE_DESC 
= '酷我音乐 - 专辑' 
 146     _VALID_URL 
= r
'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/' 
 148         'url': 'http://www.kuwo.cn/album/502294/', 
 151             'title': 'Made\xa0Series\xa0《M》', 
 152             'description': 'md5:d463f0d8a0ff3c3ea3d6ed7452a9483f', 
 157     def _real_extract(self
, url
): 
 158         album_id 
= self
._match
_id
(url
) 
 160         webpage 
= self
._download
_webpage
( 
 161             url
, album_id
, note
='Download album info', 
 162             errnote
='Unable to get album info') 
 164         album_name 
= self
._html
_search
_regex
( 
 165             r
'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage
, 
 167         album_intro 
= remove_start( 
 168             clean_html(get_element_by_id('intro', webpage
)), 
 169             '%s简介:' % album_name
) 
 172             self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 173                 r
'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"', 
 176         return self
.playlist_result(entries
, album_id
, album_name
, album_intro
) 
 179 class KuwoChartIE(InfoExtractor
): 
 180     IE_NAME 
= 'kuwo:chart' 
 181     IE_DESC 
= '酷我音乐 - 排行榜' 
 182     _VALID_URL 
= r
'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' 
 184         'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm', 
 188         'playlist_mincount': 10, 
 191     def _real_extract(self
, url
): 
 192         chart_id 
= self
._match
_id
(url
) 
 193         webpage 
= self
._download
_webpage
( 
 194             url
, chart_id
, note
='Download chart info', 
 195             errnote
='Unable to get chart info') 
 198             self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 199                 r
'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage
) 
 201         return self
.playlist_result(entries
, chart_id
) 
 204 class KuwoSingerIE(InfoExtractor
): 
 205     IE_NAME 
= 'kuwo:singer' 
 206     IE_DESC 
= '酷我音乐 - 歌手' 
 207     _VALID_URL 
= r
'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' 
 209         'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 
 212             'title': 'Bruno\xa0Mars', 
 214         'playlist_mincount': 329, 
 216         'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm', 
 221         'playlist_mincount': 95, 
 222         'skip': 'Regularly stalls travis build',  # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540 
 227     def _real_extract(self
, url
): 
 228         singer_id 
= self
._match
_id
(url
) 
 229         webpage 
= self
._download
_webpage
( 
 230             url
, singer_id
, note
='Download singer info', 
 231             errnote
='Unable to get singer info') 
 233         singer_name 
= self
._html
_search
_regex
( 
 234             r
'<h1>([^<]+)</h1>', webpage
, 'singer name') 
 236         artist_id 
= self
._html
_search
_regex
( 
 237             r
'data-artistid="(\d+)"', webpage
, 'artist id') 
 239         page_count 
= int(self
._html
_search
_regex
( 
 240             r
'data-page="(\d+)"', webpage
, 'page count')) 
 242         def page_func(page_num
): 
 243             webpage 
= self
._download
_webpage
( 
 244                 'http://www.kuwo.cn/artist/contentMusicsAjax', 
 245                 singer_id
, note
='Download song list page #%d' % (page_num 
+ 1), 
 246                 errnote
='Unable to get song list page #%d' % (page_num 
+ 1), 
 247                 query
={'artistId': artist_id
, 'pn': page_num
, 'rn': self
.PAGE_SIZE
}) 
 250                 self
.url_result(song_url
, 'Kuwo') for song_url 
in re
.findall( 
 251                     r
'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', 
 255         entries 
= InAdvancePagedList(page_func
, page_count
, self
.PAGE_SIZE
) 
 257         return self
.playlist_result(entries
, singer_id
, singer_name
) 
 260 class KuwoCategoryIE(InfoExtractor
): 
 261     IE_NAME 
= 'kuwo:category' 
 262     IE_DESC 
= '酷我音乐 - 分类' 
 263     _VALID_URL 
= r
'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' 
 265         'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm', 
 269             'description': '这些都是属于八十年代的回忆!', 
 271         'playlist_mincount': 24, 
 274     def _real_extract(self
, url
): 
 275         category_id 
= self
._match
_id
(url
) 
 276         webpage 
= self
._download
_webpage
( 
 277             url
, category_id
, note
='Download category info', 
 278             errnote
='Unable to get category info') 
 280         category_name 
= self
._html
_search
_regex
( 
 281             r
'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage
, 'category name') 
 283         category_desc 
= remove_start( 
 284             get_element_by_id('intro', webpage
).strip(), 
 285             '%s简介:' % category_name
) 
 286         if category_desc 
== '暂无': 
 289         jsonm 
= self
._parse
_json
(self
._html
_search
_regex
( 
 290             r
'var\s+jsonm\s*=\s*([^;]+);', webpage
, 'category songs'), category_id
) 
 293             self
.url_result('http://www.kuwo.cn/yinyue/%s/' % song
['musicrid'], 'Kuwo') 
 294             for song 
in jsonm
['musiclist'] 
 296         return self
.playlist_result(entries
, category_id
, category_name
, category_desc
) 
 299 class KuwoMvIE(KuwoBaseIE
): 
 301     IE_DESC 
= '酷我音乐 - MV' 
 302     _VALID_URL 
= r
'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/' 
 304         'url': 'http://www.kuwo.cn/mv/6480076/', 
 308             'title': 'My HouseMV', 
 309             'creator': 'PM02:00', 
 311         # In this video, music URLs (anti.s) are blocked outside China and 
 312         # USA, while the MV URL (mvurl) is available globally, so force the MV 
 313         # URL for consistent results in different countries 
 318     _FORMATS 
= KuwoBaseIE
._FORMATS 
+ [ 
 319         {'format': 'mkv', 'ext': 'mkv', 'preference': 250}, 
 320         {'format': 'mp4', 'ext': 'mp4', 'preference': 200}, 
 323     def _real_extract(self
, url
): 
 324         song_id 
= self
._match
_id
(url
) 
 325         webpage 
= self
._download
_webpage
( 
 326             url
, song_id
, note
='Download mv detail info: %s' % song_id
, 
 327             errnote
='Unable to get mv detail info: %s' % song_id
) 
 330             r
'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"', 
 333             song_name 
= mobj
.group('song') 
 334             singer_name 
= mobj
.group('singer') 
 336             raise ExtractorError('Unable to find song or singer names') 
 338         formats 
= self
._get
_formats
(song_id
, tolerate_ip_deny
=True) 
 340         mv_url 
= self
._download
_webpage
( 
 341             'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id
, 
 342             song_id
, note
='Download %s MV URL' % song_id
) 
 348         self
._sort
_formats
(formats
) 
 353             'creator': singer_name
,