]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yandexmusic.py
Merge tag 'upstream/2015.07.21'
[youtubedl] / youtube_dl / extractor / yandexmusic.py
1 # coding=utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import hashlib
6
7 from .common import InfoExtractor
8 from ..compat import compat_str
9 from ..utils import (
10 int_or_none,
11 float_or_none,
12 )
13
14
15 class YandexMusicBaseIE(InfoExtractor):
16 def _get_track_url(self, storage_dir, track_id):
17 data = self._download_json(
18 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
19 % storage_dir,
20 track_id, 'Downloading track location JSON')
21
22 key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
23 storage = storage_dir.split('.')
24
25 return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
26 % (data['host'], key, data['ts'] + data['path'], storage[1]))
27
28 def _get_track_info(self, track):
29 return {
30 'id': track['id'],
31 'ext': 'mp3',
32 'url': self._get_track_url(track['storageDir'], track['id']),
33 'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
34 'filesize': int_or_none(track.get('fileSize')),
35 'duration': float_or_none(track.get('durationMs'), 1000),
36 }
37
38
39 class YandexMusicTrackIE(YandexMusicBaseIE):
40 IE_NAME = 'yandexmusic:track'
41 IE_DESC = 'Яндекс.Музыка - Трек'
42 _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
43
44 _TEST = {
45 'url': 'http://music.yandex.ru/album/540508/track/4878838',
46 'md5': 'f496818aa2f60b6c0062980d2e00dc20',
47 'info_dict': {
48 'id': '4878838',
49 'ext': 'mp3',
50 'title': 'Carlo Ambrosio - Gypsy Eyes 1',
51 'filesize': 4628061,
52 'duration': 193.04,
53 }
54 }
55
56 def _real_extract(self, url):
57 mobj = re.match(self._VALID_URL, url)
58 album_id, track_id = mobj.group('album_id'), mobj.group('id')
59
60 track = self._download_json(
61 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
62 track_id, 'Downloading track JSON')['track']
63
64 return self._get_track_info(track)
65
66
67 class YandexMusicAlbumIE(YandexMusicBaseIE):
68 IE_NAME = 'yandexmusic:album'
69 IE_DESC = 'Яндекс.Музыка - Альбом'
70 _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
71
72 _TEST = {
73 'url': 'http://music.yandex.ru/album/540508',
74 'info_dict': {
75 'id': '540508',
76 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
77 },
78 'playlist_count': 50,
79 }
80
81 def _real_extract(self, url):
82 album_id = self._match_id(url)
83
84 album = self._download_json(
85 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
86 album_id, 'Downloading album JSON')
87
88 entries = [self._get_track_info(track) for track in album['volumes'][0]]
89
90 title = '%s - %s' % (album['artists'][0]['name'], album['title'])
91 year = album.get('year')
92 if year:
93 title += ' (%s)' % year
94
95 return self.playlist_result(entries, compat_str(album['id']), title)
96
97
98 class YandexMusicPlaylistIE(YandexMusicBaseIE):
99 IE_NAME = 'yandexmusic:playlist'
100 IE_DESC = 'Яндекс.Музыка - Плейлист'
101 _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
102
103 _TEST = {
104 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
105 'info_dict': {
106 'id': '1245',
107 'title': 'Что слушают Enter Shikari',
108 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
109 },
110 'playlist_count': 6,
111 }
112
113 def _real_extract(self, url):
114 playlist_id = self._match_id(url)
115
116 webpage = self._download_webpage(url, playlist_id)
117
118 playlist = self._parse_json(
119 self._search_regex(
120 r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
121 playlist_id)['pageData']['playlist']
122
123 entries = [self._get_track_info(track) for track in playlist['tracks']]
124
125 return self.playlist_result(
126 entries, compat_str(playlist_id),
127 playlist['title'], playlist.get('description'))