-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P<uploader>[\w\d-]+)/
- (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
'uploader': 'E.T. ExTerrestrial Music',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'duration': 143,
+ 'license': 'all-rights-reserved',
}
},
# not streamable song
'uploader': 'The Royal Concept',
'upload_date': '20120521',
'duration': 227,
+ 'license': 'all-rights-reserved',
},
'params': {
# rtmp
'description': 'test chars: \"\'/\\ä↭',
'upload_date': '20131209',
'duration': 9,
+ 'license': 'all-rights-reserved',
},
},
# private link (alt format)
'description': 'test chars: \"\'/\\ä↭',
'upload_date': '20131209',
'duration': 9,
+ 'license': 'all-rights-reserved',
},
},
# downloadable song
'uploader': 'oddsamples',
'upload_date': '20140109',
'duration': 17,
+ 'license': 'cc-by-sa',
},
},
]
- _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
+ _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+ webpage)]
+
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen('%s: Resolving id' % video_id)
name = full_title or track_id
if quiet:
self.report_extraction(name)
-
- thumbnail = info['artwork_url']
- if thumbnail is not None:
+ thumbnail = info.get('artwork_url')
+ if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
ext = 'mp3'
result = {
'id': track_id,
- 'uploader': info['user']['username'],
- 'upload_date': unified_strdate(info['created_at']),
+ 'uploader': info.get('user', {}).get('username'),
+ 'upload_date': unified_strdate(info.get('created_at')),
'title': info['title'],
- 'description': info['description'],
+ 'description': info.get('description'),
'thumbnail': thumbnail,
'duration': int_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
}
formats = []
if info.get('downloadable', False):
})
# We have to retrieve the url
- streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
- 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
format_dict = self._download_json(
- streams_url,
- track_id, 'Downloading track url')
+ 'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
+ track_id, 'Downloading track url', query={
+ 'client_id': self._CLIENT_ID,
+ 'secret_token': secret_token,
+ })
for key, stream_url in format_dict.items():
+ abr = int_or_none(self._search_regex(
+ r'_(\d+)_url', key, 'audio bitrate', default=None))
if key.startswith('http'):
- formats.append({
+ stream_formats = [{
'format_id': key,
'ext': ext,
'url': stream_url,
- 'vcodec': 'none',
- })
+ }]
elif key.startswith('rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
- formats.append({
+ stream_formats = [{
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
'ext': 'flv',
- 'vcodec': 'none',
- })
-
- if not formats:
- # We fallback to the stream_url in the original info, this
- # cannot be always used, sometimes it can give an HTTP 404 error
- formats.append({
- 'format_id': 'fallback',
- 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
- 'ext': ext,
- 'vcodec': 'none',
- })
+ }]
+ elif key.startswith('hls'):
+ stream_formats = self._extract_m3u8_formats(
+ stream_url, track_id, 'mp3', entry_protocol='m3u8_native',
+ m3u8_id=key, fatal=False)
+ else:
+ continue
+
+ for f in stream_formats:
+ f['abr'] = abr
+
+ formats.extend(stream_formats)
+
+ if not formats:
+ # We fallback to the stream_url in the original info, this
+ # cannot be always used, sometimes it can give an HTTP 404 error
+ formats.append({
+ 'format_id': 'fallback',
+ 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+ 'ext': ext,
+ })
- for f in formats:
- if f['format_id'].startswith('http'):
- f['protocol'] = 'http'
- if f['format_id'].startswith('rtmp'):
- f['protocol'] = 'rtmp'
+ for f in formats:
+ f['vcodec'] = 'none'
self._check_formats(formats, track_id)
self._sort_formats(formats)
raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id')
- token = None
+
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
return self._extract_info_dict(info, full_title, secret_token=token)
-class SoundcloudSetIE(SoundcloudIE):
+class SoundcloudPlaylistBaseIE(SoundcloudIE):
+ @staticmethod
+ def _extract_id(e):
+ return compat_str(e['id']) if e.get('id') else None
+
+ def _extract_track_entries(self, tracks):
+ return [
+ self.url_result(
+ track['permalink_url'], SoundcloudIE.ie_key(),
+ video_id=self._extract_id(track))
+ for track in tracks if track.get('permalink_url')]
+
+
+class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
'title': 'The Royal Concept EP',
},
'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+ 'only_matching': True,
}]
def _real_extract(self, url):
msgs = (compat_str(err['error_message']) for err in info['errors'])
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
- entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']]
+ entries = self._extract_track_entries(info['tracks'])
return {
'_type': 'playlist',
}
-class SoundcloudUserIE(SoundcloudIE):
+class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:(?:www|m)\.)?soundcloud\.com/
'id': '114582580',
'title': 'The Akashic Chronicler (All)',
},
- 'playlist_mincount': 111,
+ 'playlist_mincount': 74,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Tracks)',
},
- 'playlist_mincount': 50,
+ 'playlist_mincount': 37,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
'info_dict': {
'id': '114582580',
'title': 'The Akashic Chronicler (Playlists)',
},
- 'playlist_mincount': 3,
+ 'playlist_mincount': 2,
}, {
'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
'info_dict': {
'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': {
'id': '7098329',
- 'title': 'Grynpyret (Spotlight)',
+ 'title': 'GRYNPYRET (Spotlight)',
},
'playlist_mincount': 1,
}]
for cand in candidates:
if isinstance(cand, dict):
permalink_url = cand.get('permalink_url')
+ entry_id = self._extract_id(cand)
if permalink_url and permalink_url.startswith('http'):
- return permalink_url
+ return permalink_url, entry_id
for e in collection:
- permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+ permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
if permalink_url:
- entries.append(self.url_result(permalink_url))
+ entries.append(self.url_result(permalink_url, video_id=entry_id))
next_href = response.get('next_href')
if not next_href:
}
-class SoundcloudPlaylistIE(SoundcloudIE):
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist'
_TESTS = [{
data = self._download_json(
base_url + data, playlist_id, 'Downloading playlist')
- entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']]
+ entries = self._extract_track_entries(data['tracks'])
return {
'_type': 'playlist',