2 from __future__ 
import unicode_literals
 
  11 from ..compat 
import ( 
  14     compat_urllib_parse_urlencode
, 
  24 class SoundcloudIE(InfoExtractor
): 
  25     """Information extractor for soundcloud.com 
  26        To access the media, the uid of the song and a stream token 
  27        must be extracted from the page source and the script must make 
  28        a request to media.soundcloud.com/crossdomain.xml. Then 
  29        the media can be grabbed by requesting from an url composed 
  30        of the stream token and uid 
  33     _VALID_URL 
= r
'''(?x)^(?:https?://)? 
  34                     (?:(?:(?:www\.|m\.)?soundcloud\.com/ 
  36                             (?P<uploader>[\w\d-]+)/ 
  37                             (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) 
  39                             (?P<token>[^?]+?)?(?:[?].*)?$) 
  40                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) 
  41                           (?:/?\?secret_token=(?P<secret_token>[^&]+))?) 
  42                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) 
  45     IE_NAME 
= 'soundcloud' 
  48             'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', 
  49             'md5': 'ebef0a451b909710ed1d7787dddbf0d7', 
  53                 'upload_date': '20121011', 
  54                 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 
  55                 'uploader': 'E.T. ExTerrestrial Music', 
  56                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 
  58                 'license': 'all-rights-reserved', 
  63             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 
  67                 'title': 'Goldrushed', 
  68                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 
  69                 'uploader': 'The Royal Concept', 
  70                 'upload_date': '20120521', 
  72                 'license': 'all-rights-reserved', 
  76                 'skip_download': True, 
  81             'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', 
  82             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', 
  86                 'title': 'Youtube - Dl Test Video \'\' Ä↭', 
  87                 'uploader': 'jaimeMF', 
  88                 'description': 'test chars:  \"\'/\\ä↭', 
  89                 'upload_date': '20131209', 
  91                 'license': 'all-rights-reserved', 
  94         # private link (alt format) 
  96             'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', 
  97             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', 
 101                 'title': 'Youtube - Dl Test Video \'\' Ä↭', 
 102                 'uploader': 'jaimeMF', 
 103                 'description': 'test chars:  \"\'/\\ä↭', 
 104                 'upload_date': '20131209', 
 106                 'license': 'all-rights-reserved', 
 111             'url': 'https://soundcloud.com/oddsamples/bus-brakes', 
 112             'md5': '7624f2351f8a3b2e7cd51522496e7631', 
 116                 'title': 'Bus Brakes', 
 117                 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', 
 118                 'uploader': 'oddsamples', 
 119                 'upload_date': '20140109', 
 121                 'license': 'cc-by-sa', 
 124         # private link, downloadable format 
 126             'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', 
 127             'md5': '64a60b16e617d41d0bef032b7f55441e', 
 131                 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 
 132                 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 
 133                 'uploader': 'Ori Uplift Music', 
 134                 'upload_date': '20170831', 
 136                 'license': 'all-rights-reserved', 
 139         # no album art, use avatar pic for thumbnail 
 141             'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real', 
 142             'md5': '59c7872bc44e5d99b7211891664760c2', 
 146                 'title': 'Sideways (Prod. Mad Real)', 
 147                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 
 148                 'uploader': 'garyvee', 
 149                 'upload_date': '20170226', 
 151                 'thumbnail': r
're:https?://.*\.jpg', 
 152                 'license': 'all-rights-reserved', 
 155                 'skip_download': True, 
 160     _CLIENT_ID 
= 'DQskPX1pntALRzMp4HSxya3Mc0AO66Ro' 
 161     _IPHONE_CLIENT_ID 
= '376f225bf427445fc4bfb6b99b72e0bf' 
 164     def _extract_urls(webpage
): 
 165         return [m
.group('url') for m 
in re
.finditer( 
 166             r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
://)?
(?
:w\
.)?soundcloud\
.com
/player
.+?
)\
1', 
 169     def report_resolve(self, video_id): 
 170         """Report information extraction.""" 
 171         self.to_screen('%s: Resolving 
id' % video_id) 
 174     def _resolv_url(cls, url): 
 175         return 'https
://api
.soundcloud
.com
/resolve
.json?url
=' + url + '&client_id
=' + cls._CLIENT_ID 
 177     def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): 
 178         track_id = compat_str(info['id']) 
 179         name = full_title or track_id 
 181             self.report_extraction(name) 
 182         thumbnail = info.get('artwork_url
') or info.get('user
', {}).get('avatar_url
') 
 183         if isinstance(thumbnail, compat_str): 
 184             thumbnail = thumbnail.replace('-large
', '-t500x500
') 
 188             'uploader
': info.get('user
', {}).get('username
'), 
 189             'upload_date
': unified_strdate(info.get('created_at
')), 
 190             'title
': info['title
'], 
 191             'description
': info.get('description
'), 
 192             'thumbnail
': thumbnail, 
 193             'duration
': int_or_none(info.get('duration
'), 1000), 
 194             'webpage_url
': info.get('permalink_url
'), 
 195             'license
': info.get('license
'), 
 198         query = {'client_id
': self._CLIENT_ID} 
 199         if secret_token is not None: 
 200             query['secret_token
'] = secret_token 
 201         if info.get('downloadable
', False): 
 202             # We can build a direct link to the song 
 203             format_url = update_url_query( 
 204                 'https
://api
.soundcloud
.com
/tracks
/%s/download
' % track_id, query) 
 206                 'format_id
': 'download
', 
 207                 'ext
': info.get('original_format
', 'mp3
'), 
 213         # We have to retrieve the url 
 214         format_dict = self._download_json( 
 215             'https
://api
.soundcloud
.com
/i1
/tracks
/%s/streams
' % track_id, 
 216             track_id, 'Downloading track url
', query=query) 
 218         for key, stream_url in format_dict.items(): 
 219             abr = int_or_none(self._search_regex( 
 220                 r'_(\d
+)_url
', key, 'audio bitrate
', default=None)) 
 221             if key.startswith('http
'): 
 227             elif key.startswith('rtmp
'): 
 228                 # The url doesn't have an rtmp app
, we have to extract the playpath
 
 229                 url
, path 
= stream_url
.split('mp3:', 1) 
 233                     'play_path': 'mp3:' + path
, 
 236             elif key
.startswith('hls'): 
 237                 stream_formats 
= self
._extract
_m
3u8_formats
( 
 238                     stream_url
, track_id
, 'mp3', entry_protocol
='m3u8_native', 
 239                     m3u8_id
=key
, fatal
=False) 
 243             for f 
in stream_formats
: 
 246             formats
.extend(stream_formats
) 
 249             # We fallback to the stream_url in the original info, this 
 250             # cannot be always used, sometimes it can give an HTTP 404 error 
 252                 'format_id': 'fallback', 
 253                 'url': update_url_query(info
['stream_url'], query
), 
 260         self
._check
_formats
(formats
, track_id
) 
 261         self
._sort
_formats
(formats
) 
 262         result
['formats'] = formats
 
 266     def _real_extract(self
, url
): 
 267         mobj 
= re
.match(self
._VALID
_URL
, url
, flags
=re
.VERBOSE
) 
 269             raise ExtractorError('Invalid URL: %s' % url
) 
 271         track_id 
= mobj
.group('track_id') 
 273         if track_id 
is not None: 
 274             info_json_url 
= 'https://api.soundcloud.com/tracks/' + track_id 
+ '.json?client_id=' + self
._CLIENT
_ID
 
 275             full_title 
= track_id
 
 276             token 
= mobj
.group('secret_token') 
 278                 info_json_url 
+= '&secret_token=' + token
 
 279         elif mobj
.group('player'): 
 280             query 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(url
).query
) 
 281             real_url 
= query
['url'][0] 
 282             # If the token is in the query of the original url we have to 
 284             if 'secret_token' in query
: 
 285                 real_url 
+= '?secret_token=' + query
['secret_token'][0] 
 286             return self
.url_result(real_url
) 
 288             # extract uploader (which is in the url) 
 289             uploader 
= mobj
.group('uploader') 
 290             # extract simple title (uploader + slug of song title) 
 291             slug_title 
= mobj
.group('title') 
 292             token 
= mobj
.group('token') 
 293             full_title 
= resolve_title 
= '%s/%s' % (uploader
, slug_title
) 
 295                 resolve_title 
+= '/%s' % token
 
 297             self
.report_resolve(full_title
) 
 299             url 
= 'https://soundcloud.com/%s' % resolve_title
 
 300             info_json_url 
= self
._resolv
_url
(url
) 
 301         info 
= self
._download
_json
(info_json_url
, full_title
, 'Downloading info JSON') 
 303         return self
._extract
_info
_dict
(info
, full_title
, secret_token
=token
) 
 306 class SoundcloudPlaylistBaseIE(SoundcloudIE
): 
 309         return compat_str(e
['id']) if e
.get('id') else None 
 311     def _extract_track_entries(self
, tracks
): 
 314                 track
['permalink_url'], SoundcloudIE
.ie_key(), 
 315                 video_id
=self
._extract
_id
(track
)) 
 316             for track 
in tracks 
if track
.get('permalink_url')] 
 319 class SoundcloudSetIE(SoundcloudPlaylistBaseIE
): 
 320     _VALID_URL 
= r
'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' 
 321     IE_NAME 
= 'soundcloud:set' 
 323         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', 
 326             'title': 'The Royal Concept EP', 
 328         'playlist_mincount': 5, 
 330         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', 
 331         'only_matching': True, 
 334     def _real_extract(self
, url
): 
 335         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 337         # extract uploader (which is in the url) 
 338         uploader 
= mobj
.group('uploader') 
 339         # extract simple title (uploader + slug of song title) 
 340         slug_title 
= mobj
.group('slug_title') 
 341         full_title 
= '%s/sets/%s' % (uploader
, slug_title
) 
 342         url 
= 'https://soundcloud.com/%s/sets/%s' % (uploader
, slug_title
) 
 344         token 
= mobj
.group('token') 
 346             full_title 
+= '/' + token
 
 349         self
.report_resolve(full_title
) 
 351         resolv_url 
= self
._resolv
_url
(url
) 
 352         info 
= self
._download
_json
(resolv_url
, full_title
) 
 355             msgs 
= (compat_str(err
['error_message']) for err 
in info
['errors']) 
 356             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs
)) 
 358         entries 
= self
._extract
_track
_entries
(info
['tracks']) 
 363             'id': '%s' % info
['id'], 
 364             'title': info
['title'], 
 368 class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE
): 
 369     _API_BASE 
= 'https://api.soundcloud.com' 
 370     _API_V2_BASE 
= 'https://api-v2.soundcloud.com' 
 372     def _extract_playlist(self
, base_url
, playlist_id
, playlist_title
): 
 375             'client_id': self
._CLIENT
_ID
, 
 376             'linked_partitioning': '1', 
 379         query 
= COMMON_QUERY
.copy() 
 382         next_href 
= base_url 
+ '?' + compat_urllib_parse_urlencode(query
) 
 385         for i 
in itertools
.count(): 
 386             response 
= self
._download
_json
( 
 387                 next_href
, playlist_id
, 'Downloading track page %s' % (i 
+ 1)) 
 389             collection 
= response
['collection'] 
 393             def resolve_permalink_url(candidates
): 
 394                 for cand 
in candidates
: 
 395                     if isinstance(cand
, dict): 
 396                         permalink_url 
= cand
.get('permalink_url') 
 397                         entry_id 
= self
._extract
_id
(cand
) 
 398                         if permalink_url 
and permalink_url
.startswith('http'): 
 399                             return permalink_url
, entry_id
 
 402                 permalink_url
, entry_id 
= resolve_permalink_url((e
, e
.get('track'), e
.get('playlist'))) 
 404                     entries
.append(self
.url_result(permalink_url
, video_id
=entry_id
)) 
 406             next_href 
= response
.get('next_href') 
 410             parsed_next_href 
= compat_urlparse
.urlparse(response
['next_href']) 
 411             qs 
= compat_urlparse
.parse_qs(parsed_next_href
.query
) 
 412             qs
.update(COMMON_QUERY
) 
 413             next_href 
= compat_urlparse
.urlunparse( 
 414                 parsed_next_href
._replace
(query
=compat_urllib_parse_urlencode(qs
, True))) 
 419             'title': playlist_title
, 
 424 class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE
): 
 425     _VALID_URL 
= r
'''(?x) 
 427                             (?:(?:www|m)\.)?soundcloud\.com/ 
 430                                 (?P<rsrc>tracks|sets|reposts|likes|spotlight) 
 434     IE_NAME 
= 'soundcloud:user' 
 436         'url': 'https://soundcloud.com/the-akashic-chronicler', 
 439             'title': 'The Akashic Chronicler (All)', 
 441         'playlist_mincount': 74, 
 443         'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 
 446             'title': 'The Akashic Chronicler (Tracks)', 
 448         'playlist_mincount': 37, 
 450         'url': 'https://soundcloud.com/the-akashic-chronicler/sets', 
 453             'title': 'The Akashic Chronicler (Playlists)', 
 455         'playlist_mincount': 2, 
 457         'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', 
 460             'title': 'The Akashic Chronicler (Reposts)', 
 462         'playlist_mincount': 7, 
 464         'url': 'https://soundcloud.com/the-akashic-chronicler/likes', 
 467             'title': 'The Akashic Chronicler (Likes)', 
 469         'playlist_mincount': 321, 
 471         'url': 'https://soundcloud.com/grynpyret/spotlight', 
 474             'title': 'Grynpyret (Spotlight)', 
 476         'playlist_mincount': 1, 
 480         'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE
._API
_V
2_BASE
, 
 481         'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE
._API
_BASE
, 
 482         'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE
._API
_V
2_BASE
, 
 483         'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE
._API
_V
2_BASE
, 
 484         'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE
._API
_V
2_BASE
, 
 485         'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE
._API
_V
2_BASE
, 
 492         'reposts': 'Reposts', 
 494         'spotlight': 'Spotlight', 
 497     def _real_extract(self
, url
): 
 498         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 499         uploader 
= mobj
.group('user') 
 501         url 
= 'https://soundcloud.com/%s/' % uploader
 
 502         resolv_url 
= self
._resolv
_url
(url
) 
 503         user 
= self
._download
_json
( 
 504             resolv_url
, uploader
, 'Downloading user info') 
 506         resource 
= mobj
.group('rsrc') or 'all' 
 508         return self
._extract
_playlist
( 
 509             self
._BASE
_URL
_MAP
[resource
] % user
['id'], compat_str(user
['id']), 
 510             '%s (%s)' % (user
['username'], self
._TITLE
_MAP
[resource
])) 
 513 class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE
): 
 514     _VALID_URL 
= r
'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)' 
 515     IE_NAME 
= 'soundcloud:trackstation' 
 517         'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', 
 520             'title': 'Track station: your-text', 
 522         'playlist_mincount': 47, 
 525     def _real_extract(self
, url
): 
 526         track_name 
= self
._match
_id
(url
) 
 528         webpage 
= self
._download
_webpage
(url
, track_name
) 
 530         track_id 
= self
._search
_regex
( 
 531             r
'soundcloud:track-stations:(\d+)', webpage
, 'track id') 
 533         return self
._extract
_playlist
( 
 534             '%s/stations/soundcloud:track-stations:%s/tracks' 
 535             % (self
._API
_V
2_BASE
, track_id
), 
 536             track_id
, 'Track station: %s' % track_name
) 
 539 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE
): 
 540     _VALID_URL 
= r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' 
 541     IE_NAME 
= 'soundcloud:playlist' 
 543         'url': 'https://api.soundcloud.com/playlists/4110309', 
 546             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', 
 547             'description': 're:.*?TILT Brass - Bowery Poetry Club', 
 552     def _real_extract(self
, url
): 
 553         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 554         playlist_id 
= mobj
.group('id') 
 555         base_url 
= '%s//api.soundcloud.com/playlists/%s.json?' % (self
.http_scheme(), playlist_id
) 
 558             'client_id': self
._CLIENT
_ID
, 
 560         token 
= mobj
.group('token') 
 563             data_dict
['secret_token'] = token
 
 565         data 
= compat_urllib_parse_urlencode(data_dict
) 
 566         data 
= self
._download
_json
( 
 567             base_url 
+ data
, playlist_id
, 'Downloading playlist') 
 569         entries 
= self
._extract
_track
_entries
(data
['tracks']) 
 574             'title': data
.get('title'), 
 575             'description': data
.get('description'), 
 580 class SoundcloudSearchIE(SearchInfoExtractor
, SoundcloudIE
): 
 581     IE_NAME 
= 'soundcloud:search' 
 582     IE_DESC 
= 'Soundcloud search' 
 583     _MAX_RESULTS 
= float('inf') 
 585         'url': 'scsearch15:post-avant jazzcore', 
 587             'title': 'post-avant jazzcore', 
 589         'playlist_count': 15, 
 592     _SEARCH_KEY 
= 'scsearch' 
 593     _MAX_RESULTS_PER_PAGE 
= 200 
 594     _DEFAULT_RESULTS_PER_PAGE 
= 50 
 595     _API_V2_BASE 
= 'https://api-v2.soundcloud.com' 
 597     def _get_collection(self
, endpoint
, collection_id
, **query
): 
 599             query
.get('limit', self
._DEFAULT
_RESULTS
_PER
_PAGE
), 
 600             self
._MAX
_RESULTS
_PER
_PAGE
) 
 601         query
['limit'] = limit
 
 602         query
['client_id'] = self
._CLIENT
_ID
 
 603         query
['linked_partitioning'] = '1' 
 605         data 
= compat_urllib_parse_urlencode(query
) 
 606         next_url 
= '{0}{1}?{2}'.format(self
._API
_V
2_BASE
, endpoint
, data
) 
 608         collected_results 
= 0 
 610         for i 
in itertools
.count(1): 
 611             response 
= self
._download
_json
( 
 612                 next_url
, collection_id
, 'Downloading page {0}'.format(i
), 
 613                 'Unable to download API page') 
 615             collection 
= response
.get('collection', []) 
 619             collection 
= list(filter(bool, collection
)) 
 620             collected_results 
+= len(collection
) 
 622             for item 
in collection
: 
 623                 yield self
.url_result(item
['uri'], SoundcloudIE
.ie_key()) 
 625             if not collection 
or collected_results 
>= limit
: 
 628             next_url 
= response
.get('next_href') 
 632     def _get_n_results(self
, query
, n
): 
 633         tracks 
= self
._get
_collection
('/search/tracks', query
, limit
=n
, q
=query
) 
 634         return self
.playlist_result(tracks
, playlist_title
=query
)