2 from __future__ 
import unicode_literals
 
  11 from ..compat 
import ( 
  24 class SoundcloudIE(InfoExtractor
): 
  25     """Information extractor for soundcloud.com 
  26        To access the media, the uid of the song and a stream token 
  27        must be extracted from the page source and the script must make 
  28        a request to media.soundcloud.com/crossdomain.xml. Then 
  29        the media can be grabbed by requesting from an url composed 
  30        of the stream token and uid 
  33     _VALID_URL 
= r
'''(?x)^(?:https?://)? 
  34                     (?:(?:(?:www\.|m\.)?soundcloud\.com/ 
  35                             (?P<uploader>[\w\d-]+)/ 
  36                             (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#])) 
  38                             (?P<token>[^?]+?)?(?:[?].*)?$) 
  39                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) 
  40                           (?:/?\?secret_token=(?P<secret_token>[^&]+))?) 
  41                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) 
  44     IE_NAME 
= 'soundcloud' 
  47             'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', 
  48             'md5': 'ebef0a451b909710ed1d7787dddbf0d7', 
  52                 'upload_date': '20121011', 
  53                 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 
  54                 'uploader': 'E.T. ExTerrestrial Music', 
  55                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 
  61             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 
  65                 'title': 'Goldrushed', 
  66                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 
  67                 'uploader': 'The Royal Concept', 
  68                 'upload_date': '20120521', 
  73                 'skip_download': True, 
  78             'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', 
  79             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', 
  83                 'title': 'Youtube - Dl Test Video \'\' Ä↭', 
  84                 'uploader': 'jaimeMF', 
  85                 'description': 'test chars:  \"\'/\\ä↭', 
  86                 'upload_date': '20131209', 
  90         # private link (alt format) 
  92             'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', 
  93             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', 
  97                 'title': 'Youtube - Dl Test Video \'\' Ä↭', 
  98                 'uploader': 'jaimeMF', 
  99                 'description': 'test chars:  \"\'/\\ä↭', 
 100                 'upload_date': '20131209', 
 106             'url': 'https://soundcloud.com/oddsamples/bus-brakes', 
 107             'md5': '7624f2351f8a3b2e7cd51522496e7631', 
 111                 'title': 'Bus Brakes', 
 112                 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 
 113                 'uploader': 'oddsamples', 
 114                 'upload_date': '20140109', 
 120     _CLIENT_ID 
= '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' 
 121     _IPHONE_CLIENT_ID 
= '376f225bf427445fc4bfb6b99b72e0bf' 
 123     def report_resolve(self
, video_id
): 
 124         """Report information extraction.""" 
 125         self
.to_screen('%s: Resolving id' % video_id
) 
 128     def _resolv_url(cls
, url
): 
 129         return 'http://api.soundcloud.com/resolve.json?url=' + url 
+ '&client_id=' + cls
._CLIENT
_ID
 
 131     def _extract_info_dict(self
, info
, full_title
=None, quiet
=False, secret_token
=None): 
 132         track_id 
= compat_str(info
['id']) 
 133         name 
= full_title 
or track_id
 
 135             self
.report_extraction(name
) 
 137         thumbnail 
= info
['artwork_url'] 
 138         if thumbnail 
is not None: 
 139             thumbnail 
= thumbnail
.replace('-large', '-t500x500') 
 143             'uploader': info
['user']['username'], 
 144             'upload_date': unified_strdate(info
['created_at']), 
 145             'title': info
['title'], 
 146             'description': info
['description'], 
 147             'thumbnail': thumbnail
, 
 148             'duration': int_or_none(info
.get('duration'), 1000), 
 149             'webpage_url': info
.get('permalink_url'), 
 152         if info
.get('downloadable', False): 
 153             # We can build a direct link to the song 
 155                 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format( 
 156                     track_id
, self
._CLIENT
_ID
)) 
 158                 'format_id': 'download', 
 159                 'ext': info
.get('original_format', 'mp3'), 
 165         # We have to retrieve the url 
 166         streams_url 
= ('http://api.soundcloud.com/i1/tracks/{0}/streams?' 
 167                        'client_id={1}&secret_token={2}'.format(track_id
, self
._IPHONE
_CLIENT
_ID
, secret_token
)) 
 168         format_dict 
= self
._download
_json
( 
 170             track_id
, 'Downloading track url') 
 172         for key
, stream_url 
in format_dict
.items(): 
 173             if key
.startswith('http'): 
 180             elif key
.startswith('rtmp'): 
 181                 # The url doesn't have an rtmp app, we have to extract the playpath 
 182                 url
, path 
= stream_url
.split('mp3:', 1) 
 186                     'play_path': 'mp3:' + path
, 
 192                 # We fallback to the stream_url in the original info, this 
 193                 # cannot be always used, sometimes it can give an HTTP 404 error 
 195                     'format_id': 'fallback', 
 196                     'url': info
['stream_url'] + '?client_id=' + self
._CLIENT
_ID
, 
 202                 if f
['format_id'].startswith('http'): 
 203                     f
['protocol'] = 'http' 
 204                 if f
['format_id'].startswith('rtmp'): 
 205                     f
['protocol'] = 'rtmp' 
 207         self
._check
_formats
(formats
, track_id
) 
 208         self
._sort
_formats
(formats
) 
 209         result
['formats'] = formats
 
 213     def _real_extract(self
, url
): 
 214         mobj 
= re
.match(self
._VALID
_URL
, url
, flags
=re
.VERBOSE
) 
 216             raise ExtractorError('Invalid URL: %s' % url
) 
 218         track_id 
= mobj
.group('track_id') 
 220         if track_id 
is not None: 
 221             info_json_url 
= 'http://api.soundcloud.com/tracks/' + track_id 
+ '.json?client_id=' + self
._CLIENT
_ID
 
 222             full_title 
= track_id
 
 223             token 
= mobj
.group('secret_token') 
 225                 info_json_url 
+= '&secret_token=' + token
 
 226         elif mobj
.group('player'): 
 227             query 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
 228             real_url 
= query
['url'][0] 
 229             # If the token is in the query of the original url we have to 
 231             if 'secret_token' in query
: 
 232                 real_url 
+= '?secret_token=' + query
['secret_token'][0] 
 233             return self
.url_result(real_url
) 
 235             # extract uploader (which is in the url) 
 236             uploader 
= mobj
.group('uploader') 
 237             # extract simple title (uploader + slug of song title) 
 238             slug_title 
= mobj
.group('title') 
 239             token 
= mobj
.group('token') 
 240             full_title 
= resolve_title 
= '%s/%s' % (uploader
, slug_title
) 
 242                 resolve_title 
+= '/%s' % token
 
 244             self
.report_resolve(full_title
) 
 246             url 
= 'http://soundcloud.com/%s' % resolve_title
 
 247             info_json_url 
= self
._resolv
_url
(url
) 
 248         info 
= self
._download
_json
(info_json_url
, full_title
, 'Downloading info JSON') 
 250         return self
._extract
_info
_dict
(info
, full_title
, secret_token
=token
) 
 253 class SoundcloudSetIE(SoundcloudIE
): 
 254     _VALID_URL 
= r
'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' 
 255     IE_NAME 
= 'soundcloud:set' 
 257         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', 
 260             'title': 'The Royal Concept EP', 
 262         'playlist_mincount': 6, 
 265     def _real_extract(self
, url
): 
 266         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 268         # extract uploader (which is in the url) 
 269         uploader 
= mobj
.group('uploader') 
 270         # extract simple title (uploader + slug of song title) 
 271         slug_title 
= mobj
.group('slug_title') 
 272         full_title 
= '%s/sets/%s' % (uploader
, slug_title
) 
 273         url 
= 'http://soundcloud.com/%s/sets/%s' % (uploader
, slug_title
) 
 275         token 
= mobj
.group('token') 
 277             full_title 
+= '/' + token
 
 280         self
.report_resolve(full_title
) 
 282         resolv_url 
= self
._resolv
_url
(url
) 
 283         info 
= self
._download
_json
(resolv_url
, full_title
) 
 286             msgs 
= (compat_str(err
['error_message']) for err 
in info
['errors']) 
 287             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs
)) 
 289         entries 
= [self
.url_result(track
['permalink_url'], 'Soundcloud') for track 
in info
['tracks']] 
 294             'id': '%s' % info
['id'], 
 295             'title': info
['title'], 
 299 class SoundcloudUserIE(SoundcloudIE
): 
 300     _VALID_URL 
= r
'''(?x) 
 302                             (?:(?:www|m)\.)?soundcloud\.com/ 
 305                                 (?P<rsrc>tracks|sets|reposts|likes|spotlight) 
 309     IE_NAME 
= 'soundcloud:user' 
 311         'url': 'https://soundcloud.com/the-akashic-chronicler', 
 314             'title': 'The Akashic Chronicler (All)', 
 316         'playlist_mincount': 111, 
 318         'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 
 321             'title': 'The Akashic Chronicler (Tracks)', 
 323         'playlist_mincount': 50, 
 325         'url': 'https://soundcloud.com/the-akashic-chronicler/sets', 
 328             'title': 'The Akashic Chronicler (Playlists)', 
 330         'playlist_mincount': 3, 
 332         'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', 
 335             'title': 'The Akashic Chronicler (Reposts)', 
 337         'playlist_mincount': 7, 
 339         'url': 'https://soundcloud.com/the-akashic-chronicler/likes', 
 342             'title': 'The Akashic Chronicler (Likes)', 
 344         'playlist_mincount': 321, 
 346         'url': 'https://soundcloud.com/grynpyret/spotlight', 
 349             'title': 'Grynpyret (Spotlight)', 
 351         'playlist_mincount': 1, 
 354     _API_BASE 
= 'https://api.soundcloud.com' 
 355     _API_V2_BASE 
= 'https://api-v2.soundcloud.com' 
 358         'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE
, 
 359         'tracks': '%s/users/%%s/tracks' % _API_BASE
, 
 360         'sets': '%s/users/%%s/playlists' % _API_V2_BASE
, 
 361         'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE
, 
 362         'likes': '%s/users/%%s/likes' % _API_V2_BASE
, 
 363         'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE
, 
 370         'reposts': 'Reposts', 
 372         'spotlight': 'Spotlight', 
 375     def _real_extract(self
, url
): 
 376         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 377         uploader 
= mobj
.group('user') 
 379         url 
= 'http://soundcloud.com/%s/' % uploader
 
 380         resolv_url 
= self
._resolv
_url
(url
) 
 381         user 
= self
._download
_json
( 
 382             resolv_url
, uploader
, 'Downloading user info') 
 384         resource 
= mobj
.group('rsrc') or 'all' 
 385         base_url 
= self
._BASE
_URL
_MAP
[resource
] % user
['id'] 
 389             'client_id': self
._CLIENT
_ID
, 
 390             'linked_partitioning': '1', 
 393         query 
= COMMON_QUERY
.copy() 
 396         next_href 
= base_url 
+ '?' + compat_urllib_parse
.urlencode(query
) 
 399         for i 
in itertools
.count(): 
 400             response 
= self
._download
_json
( 
 401                 next_href
, uploader
, 'Downloading track page %s' % (i 
+ 1)) 
 403             collection 
= response
['collection'] 
 407             def resolve_permalink_url(candidates
): 
 408                 for cand 
in candidates
: 
 409                     if isinstance(cand
, dict): 
 410                         permalink_url 
= cand
.get('permalink_url') 
 411                         if permalink_url 
and permalink_url
.startswith('http'): 
 415                 permalink_url 
= resolve_permalink_url((e
, e
.get('track'), e
.get('playlist'))) 
 417                     entries
.append(self
.url_result(permalink_url
)) 
 419             next_href 
= response
.get('next_href') 
 423             parsed_next_href 
= compat_urlparse
.urlparse(response
['next_href']) 
 424             qs 
= compat_urlparse
.parse_qs(parsed_next_href
.query
) 
 425             qs
.update(COMMON_QUERY
) 
 426             next_href 
= compat_urlparse
.urlunparse( 
 427                 parsed_next_href
._replace
(query
=compat_urllib_parse
.urlencode(qs
, True))) 
 431             'id': compat_str(user
['id']), 
 432             'title': '%s (%s)' % (user
['username'], self
._TITLE
_MAP
[resource
]), 
 437 class SoundcloudPlaylistIE(SoundcloudIE
): 
 438     _VALID_URL 
= r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' 
 439     IE_NAME 
= 'soundcloud:playlist' 
 441         'url': 'http://api.soundcloud.com/playlists/4110309', 
 444             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', 
 445             'description': 're:.*?TILT Brass - Bowery Poetry Club', 
 450     def _real_extract(self
, url
): 
 451         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 452         playlist_id 
= mobj
.group('id') 
 453         base_url 
= '%s//api.soundcloud.com/playlists/%s.json?' % (self
.http_scheme(), playlist_id
) 
 456             'client_id': self
._CLIENT
_ID
, 
 458         token 
= mobj
.group('token') 
 461             data_dict
['secret_token'] = token
 
 463         data 
= compat_urllib_parse
.urlencode(data_dict
) 
 464         data 
= self
._download
_json
( 
 465             base_url 
+ data
, playlist_id
, 'Downloading playlist') 
 467         entries 
= [self
.url_result(track
['permalink_url'], 'Soundcloud') for track 
in data
['tracks']] 
 472             'title': data
.get('title'), 
 473             'description': data
.get('description'), 
 478 class SoundcloudSearchIE(SearchInfoExtractor
, SoundcloudIE
): 
 479     IE_NAME 
= 'soundcloud:search' 
 480     IE_DESC 
= 'Soundcloud search' 
 481     _MAX_RESULTS 
= float('inf') 
 483         'url': 'scsearch15:post-avant jazzcore', 
 485             'title': 'post-avant jazzcore', 
 487         'playlist_count': 15, 
 490     _SEARCH_KEY 
= 'scsearch' 
 491     _MAX_RESULTS_PER_PAGE 
= 200 
 492     _DEFAULT_RESULTS_PER_PAGE 
= 50 
 493     _API_V2_BASE 
= 'https://api-v2.soundcloud.com' 
 495     def _get_collection(self
, endpoint
, collection_id
, **query
): 
 497             query
.get('limit', self
._DEFAULT
_RESULTS
_PER
_PAGE
), 
 498             self
._MAX
_RESULTS
_PER
_PAGE
) 
 499         query
['limit'] = limit
 
 500         query
['client_id'] = self
._CLIENT
_ID
 
 501         query
['linked_partitioning'] = '1' 
 503         data 
= compat_urllib_parse
.urlencode(encode_dict(query
)) 
 504         next_url 
= '{0}{1}?{2}'.format(self
._API
_V
2_BASE
, endpoint
, data
) 
 506         collected_results 
= 0 
 508         for i 
in itertools
.count(1): 
 509             response 
= self
._download
_json
( 
 510                 next_url
, collection_id
, 'Downloading page {0}'.format(i
), 
 511                 'Unable to download API page') 
 513             collection 
= response
.get('collection', []) 
 517             collection 
= list(filter(bool, collection
)) 
 518             collected_results 
+= len(collection
) 
 520             for item 
in collection
: 
 521                 yield self
.url_result(item
['uri'], SoundcloudIE
.ie_key()) 
 523             if not collection 
or collected_results 
>= limit
: 
 526             next_url 
= response
.get('next_href') 
 530     def _get_n_results(self
, query
, n
): 
 531         tracks 
= self
._get
_collection
('/search/tracks', query
, limit
=n
, q
=query
) 
 532         return self
.playlist_result(tracks
, playlist_title
=query
)