- # We have to retrieve the url
- streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
- 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
- format_dict = self._download_json(
- streams_url,
- track_id, 'Downloading track url')
-
- for key, stream_url in format_dict.items():
- if key.startswith('http'):
- formats.append({
- 'format_id': key,
- 'ext': ext,
- 'url': stream_url,
- 'vcodec': 'none',
- })
- elif key.startswith('rtmp'):
- # The url doesn't have an rtmp app, we have to extract the playpath
- url, path = stream_url.split('mp3:', 1)
- formats.append({
- 'format_id': key,
- 'url': url,
- 'play_path': 'mp3:' + path,
- 'ext': ext,
- 'vcodec': 'none',
- })
-
- if not formats:
- # We fallback to the stream_url in the original info, this
- # cannot be always used, sometimes it can give an HTTP 404 error
- formats.append({
- 'format_id': 'fallback',
- 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
- 'ext': ext,
- 'vcodec': 'none',
- })
-
- for f in formats:
- if f['format_id'].startswith('http'):
- f['protocol'] = 'http'
- if f['format_id'].startswith('rtmp'):
- f['protocol'] = 'rtmp'
-
- self._sort_formats(formats)
- result['formats'] = formats
-
- return result
+ def invalid_url(url):
+ return not url or url in format_urls
+
+ def add_format(f, protocol, is_preview=False):
+ mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
+ if mobj:
+ for k, v in mobj.groupdict().items():
+ if not f.get(k):
+ f[k] = v
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ for k in ('ext', 'abr'):
+ v = f.get(k)
+ if v:
+ format_id_list.append(v)
+ preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
+ if preview:
+ format_id_list.append('preview')
+ abr = f.get('abr')
+ if abr:
+ f['abr'] = int(abr)
+ f.update({
+ 'format_id': '_'.join(format_id_list),
+ 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
+ 'preference': -10 if preview else None,
+ })
+ formats.append(f)
+
+ # New API
+ transcodings = try_get(
+ info, lambda x: x['media']['transcodings'], list) or []
+ for t in transcodings:
+ if not isinstance(t, dict):
+ continue
+ format_url = url_or_none(t.get('url'))
+ if not format_url:
+ continue
+ stream = self._download_json(
+ format_url, track_id, query=query, fatal=False)
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ stream_format = t.get('format') or {}
+ protocol = stream_format.get('protocol')
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ ext = None
+ preset = str_or_none(t.get('preset'))
+ if preset:
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(stream_format.get('mime_type'))
+ add_format({
+ 'url': stream_url,
+ 'ext': ext,
+ }, 'http' if protocol == 'progressive' else protocol,
+ t.get('snipped') or '/preview/' in format_url)
+
+ if not formats:
+ # Old API, does not work for some tracks (e.g.
+ # https://soundcloud.com/giovannisarani/mezzo-valzer)
+ # and might serve preview URLs (e.g.
+ # http://www.soundcloud.com/snbrn/ele)
+ format_dict = self._download_json(
+ track_base_url + '/streams', track_id,
+ 'Downloading track url', query=query, fatal=False) or {}
+
+ for key, stream_url in format_dict.items():
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
+ if mobj:
+ protocol, ext, abr = mobj.groups()
+ add_format({
+ 'abr': abr,
+ 'ext': ext,
+ 'url': stream_url,
+ }, protocol)
+
+ if not formats:
+ # We fallback to the stream_url in the original info, this
+ # cannot be always used, sometimes it can give an HTTP 404 error
+ urlh = self._request_webpage(
+ HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
+ track_id, query=query, fatal=False)
+ if urlh:
+ stream_url = urlh.geturl()
+ if not invalid_url(stream_url):
+ add_format({'url': stream_url}, 'http')
+
+ for f in formats:
+ f['vcodec'] = 'none'
+
+ self._sort_formats(formats)
+
+ user = info.get('user') or {}
+
+ thumbnails = []
+ artwork_url = info.get('artwork_url')
+ thumbnail = artwork_url or user.get('avatar_url')
+ if isinstance(thumbnail, compat_str):
+ if re.search(self._IMAGE_REPL_RE, thumbnail):
+ for image_id, size in self._ARTWORK_MAP.items():
+ i = {
+ 'id': image_id,
+ 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
+ }
+ if image_id == 'tiny' and not artwork_url:
+ size = 18
+ elif image_id == 'original':
+ i['preference'] = 10
+ if size:
+ i.update({
+ 'width': size,
+ 'height': size,
+ })
+ thumbnails.append(i)
+ else:
+ thumbnails = [{'url': thumbnail}]
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ return {
+ 'id': track_id,
+ 'uploader': user.get('username'),
+ 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
+ 'uploader_url': user.get('permalink_url'),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': extract_count('favoritings') or extract_count('likes'),
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
+ 'formats': formats
+ }