+ msgs = (compat_str(err['error_message']) for err in info['errors'])
+ raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
+
+ entries = self._extract_track_entries(info['tracks'])
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': '%s' % info['id'],
+ 'title': info['title'],
+ }
+
+
+class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
+ _API_V2_BASE = 'https://api-v2.soundcloud.com'
+
+ def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ COMMON_QUERY = {
+ 'limit': 50,
+ 'client_id': self._CLIENT_ID,
+ 'linked_partitioning': '1',
+ }
+
+ query = COMMON_QUERY.copy()
+ query['offset'] = 0
+
+ next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
+
+ entries = []
+ for i in itertools.count():
+ response = self._download_json(
+ next_href, playlist_id, 'Downloading track page %s' % (i + 1))
+
+ collection = response['collection']
+
+ if not isinstance(collection, list):
+ collection = []
+
+ # Empty collection may be returned, in this case we proceed
+ # straight to next_href
+
+ def resolve_entry(candidates):
+ for cand in candidates:
+ if not isinstance(cand, dict):
+ continue
+ permalink_url = url_or_none(cand.get('permalink_url'))
+ if not permalink_url:
+ continue
+ return self.url_result(
+ permalink_url,
+ ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ video_id=self._extract_id(cand),
+ video_title=cand.get('title'))
+
+ for e in collection:
+ entry = resolve_entry((e, e.get('track'), e.get('playlist')))
+ if entry:
+ entries.append(entry)
+
+ next_href = response.get('next_href')
+ if not next_href:
+ break
+
+ parsed_next_href = compat_urlparse.urlparse(response['next_href'])
+ qs = compat_urlparse.parse_qs(parsed_next_href.query)
+ qs.update(COMMON_QUERY)
+ next_href = compat_urlparse.urlunparse(
+ parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m)\.)?soundcloud\.com/
+ (?P<user>[^/]+)
+ (?:/
+ (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
+ )?
+ /?(?:[?#].*)?$
+ '''
+ IE_NAME = 'soundcloud:user'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/soft-cell-official',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (All)',
+ },
+ 'playlist_mincount': 28,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/tracks',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Tracks)',
+ },
+ 'playlist_mincount': 27,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/albums',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Albums)',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/sets',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Playlists)',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/reposts',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Reposts)',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/clalberg/likes',
+ 'info_dict': {
+ 'id': '11817582',
+ 'title': 'clalberg (Likes)',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://soundcloud.com/grynpyret/spotlight',
+ 'info_dict': {
+ 'id': '7098329',
+ 'title': 'Grynpyret (Spotlight)',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ _BASE_URL_MAP = {
+ 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ }
+
+ _TITLE_MAP = {
+ 'all': 'All',
+ 'tracks': 'Tracks',
+ 'albums': 'Albums',
+ 'sets': 'Playlists',
+ 'reposts': 'Reposts',
+ 'likes': 'Likes',
+ 'spotlight': 'Spotlight',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader = mobj.group('user')
+
+ url = 'https://soundcloud.com/%s/' % uploader
+ resolv_url = self._resolv_url(url)
+ user = self._download_json(
+ resolv_url, uploader, 'Downloading user info')
+
+ resource = mobj.group('rsrc') or 'all'
+
+ return self._extract_playlist(
+ self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
+ '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
+
+
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+ IE_NAME = 'soundcloud:trackstation'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+ 'info_dict': {
+ 'id': '286017854',
+ 'title': 'Track station: your-text',
+ },
+ 'playlist_mincount': 47,
+ }]
+
+ def _real_extract(self, url):
+ track_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, track_name)
+
+ track_id = self._search_regex(
+ r'soundcloud:track-stations:(\d+)', webpage, 'track id')
+
+ return self._extract_playlist(
+ '%s/stations/soundcloud:track-stations:%s/tracks'
+ % (self._API_V2_BASE, track_id),
+ track_id, 'Track station: %s' % track_name)
+
+
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
+ IE_NAME = 'soundcloud:playlist'
+ _TESTS = [{
+ 'url': 'https://api.soundcloud.com/playlists/4110309',
+ 'info_dict': {
+ 'id': '4110309',
+ 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+ 'description': 're:.*?TILT Brass - Bowery Poetry Club',
+ },
+ 'playlist_count': 6,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
+
+ data_dict = {
+ 'client_id': self._CLIENT_ID,
+ }
+ token = mobj.group('token')
+
+ if token:
+ data_dict['secret_token'] = token
+
+ data = compat_urllib_parse_urlencode(data_dict)
+ data = self._download_json(
+ base_url + data, playlist_id, 'Downloading playlist')
+
+ entries = self._extract_track_entries(data['tracks'])
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'entries': entries,
+ }
+
+
+class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
+ IE_NAME = 'soundcloud:search'
+ IE_DESC = 'Soundcloud search'
+ _MAX_RESULTS = float('inf')
+ _TESTS = [{
+ 'url': 'scsearch15:post-avant jazzcore',
+ 'info_dict': {
+ 'title': 'post-avant jazzcore',
+ },
+ 'playlist_count': 15,
+ }]
+
+ _SEARCH_KEY = 'scsearch'
+ _MAX_RESULTS_PER_PAGE = 200
+ _DEFAULT_RESULTS_PER_PAGE = 50
+ _API_V2_BASE = 'https://api-v2.soundcloud.com'
+
+ def _get_collection(self, endpoint, collection_id, **query):
+ limit = min(
+ query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
+ self._MAX_RESULTS_PER_PAGE)
+ query['limit'] = limit
+ query['client_id'] = self._CLIENT_ID
+ query['linked_partitioning'] = '1'
+ query['offset'] = 0
+ data = compat_urllib_parse_urlencode(query)
+ next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
+
+ collected_results = 0
+
+ for i in itertools.count(1):
+ response = self._download_json(
+ next_url, collection_id, 'Downloading page {0}'.format(i),
+ 'Unable to download API page')
+
+ collection = response.get('collection', [])
+ if not collection:
+ break
+
+ collection = list(filter(bool, collection))
+ collected_results += len(collection)
+
+ for item in collection:
+ yield self.url_result(item['uri'], SoundcloudIE.ie_key())
+
+ if not collection or collected_results >= limit:
+ break
+
+ next_url = response.get('next_href')
+ if not next_url:
+ break
+
+ def _get_n_results(self, query, n):
+ tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
+ return self.playlist_result(tracks, playlist_title=query)