]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
20 class SoundcloudIE ( InfoExtractor
):
21 """Information extractor for soundcloud.com
22 To access the media, the uid of the song and a stream token
23 must be extracted from the page source and the script must make
24 a request to media.soundcloud.com/crossdomain.xml. Then
25 the media can be grabbed by requesting from an url composed
26 of the stream token and uid
29 _VALID_URL
= r
'''(?x)^(?:https?://)?
30 (?:(?:(?:www\.|m\.)?soundcloud\.com/
31 (?P<uploader>[\w\d-]+)/
32 (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
34 (?P<token>[^?]+?)?(?:[?].*)?$)
35 |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
36 (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
37 |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
40 IE_NAME
= 'soundcloud'
43 'url' : 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,
44 'md5' : 'ebef0a451b909710ed1d7787dddbf0d7' ,
48 'upload_date' : '20121011' ,
49 'description' : 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d' ,
50 'uploader' : 'E.T. ExTerrestrial Music' ,
51 'title' : 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1' ,
57 'url' : 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,
61 'title' : 'Goldrushed' ,
62 'description' : 'From Stockholm Sweden \r\n Povel / Magnus / Filip / David \r\n www.theroyalconcept.com' ,
63 'uploader' : 'The Royal Concept' ,
64 'upload_date' : '20120521' ,
69 'skip_download' : True ,
74 'url' : 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,
75 'md5' : 'aa0dd32bfea9b0c5ef4f02aacd080604' ,
79 'title' : 'Youtube - Dl Test Video \'\' Ä↭' ,
80 'uploader' : 'jaimeMF' ,
81 'description' : 'test chars: \"\' / \\ ä↭' ,
82 'upload_date' : '20131209' ,
86 # private link (alt format)
88 'url' : 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp' ,
89 'md5' : 'aa0dd32bfea9b0c5ef4f02aacd080604' ,
93 'title' : 'Youtube - Dl Test Video \'\' Ä↭' ,
94 'uploader' : 'jaimeMF' ,
95 'description' : 'test chars: \"\' / \\ ä↭' ,
96 'upload_date' : '20131209' ,
102 'url' : 'https://soundcloud.com/oddsamples/bus-brakes' ,
103 'md5' : '7624f2351f8a3b2e7cd51522496e7631' ,
107 'title' : 'Bus Brakes' ,
108 'description' : 'md5:0053ca6396e8d2fd7b7e1595ef12ab66' ,
109 'uploader' : 'oddsamples' ,
110 'upload_date' : '20140109' ,
116 _CLIENT_ID
= '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
117 _IPHONE_CLIENT_ID
= '376f225bf427445fc4bfb6b99b72e0bf'
119 def report_resolve ( self
, video_id
):
120 """Report information extraction."""
121 self
. to_screen ( ' %s : Resolving id' % video_id
)
124 def _resolv_url ( cls
, url
):
125 return 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=' + cls
._ CLIENT
_ ID
127 def _extract_info_dict ( self
, info
, full_title
= None , quiet
= False , secret_token
= None ):
128 track_id
= compat_str ( info
[ 'id' ])
129 name
= full_title
or track_id
131 self
. report_extraction ( name
)
133 thumbnail
= info
[ 'artwork_url' ]
134 if thumbnail
is not None :
135 thumbnail
= thumbnail
. replace ( '-large' , '-t500x500' )
139 'uploader' : info
[ 'user' ][ 'username' ],
140 'upload_date' : unified_strdate ( info
[ 'created_at' ]),
141 'title' : info
[ 'title' ],
142 'description' : info
[ 'description' ],
143 'thumbnail' : thumbnail
,
144 'duration' : int_or_none ( info
. get ( 'duration' ), 1000 ),
145 'webpage_url' : info
. get ( 'permalink_url' ),
148 if info
. get ( 'downloadable' , False ):
149 # We can build a direct link to the song
151 'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (
152 track_id
, self
._ CLIENT
_ ID
))
154 'format_id' : 'download' ,
155 'ext' : info
. get ( 'original_format' , 'mp3' ),
161 # We have to retrieve the url
162 streams_url
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'
163 'client_id= {1} &secret_token= {2} ' . format ( track_id
, self
._ IPHONE
_ CLIENT
_ ID
, secret_token
))
164 format_dict
= self
._ download
_ json
(
166 track_id
, 'Downloading track url' )
168 for key
, stream_url
in format_dict
. items ():
169 if key
. startswith ( 'http' ):
176 elif key
. startswith ( 'rtmp' ):
177 # The url doesn't have an rtmp app, we have to extract the playpath
178 url
, path
= stream_url
. split ( 'mp3:' , 1 )
182 'play_path' : 'mp3:' + path
,
188 # We fallback to the stream_url in the original info, this
189 # cannot be always used, sometimes it can give an HTTP 404 error
191 'format_id' : 'fallback' ,
192 'url' : info
[ 'stream_url' ] + '?client_id=' + self
._ CLIENT
_ ID
,
198 if f
[ 'format_id' ]. startswith ( 'http' ):
199 f
[ 'protocol' ] = 'http'
200 if f
[ 'format_id' ]. startswith ( 'rtmp' ):
201 f
[ 'protocol' ] = 'rtmp'
203 self
._ check
_ formats
( formats
, track_id
)
204 self
._ sort
_ formats
( formats
)
205 result
[ 'formats' ] = formats
209 def _real_extract ( self
, url
):
210 mobj
= re
. match ( self
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
)
212 raise ExtractorError ( 'Invalid URL: %s ' % url
)
214 track_id
= mobj
. group ( 'track_id' )
216 if track_id
is not None :
217 info_json_url
= 'http://api.soundcloud.com/tracks/' + track_id
+ '.json?client_id=' + self
._ CLIENT
_ ID
218 full_title
= track_id
219 token
= mobj
. group ( 'secret_token' )
221 info_json_url
+= "&secret_token=" + token
222 elif mobj
. group ( 'player' ):
223 query
= compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)
224 real_url
= query
[ 'url' ][ 0 ]
225 # If the token is in the query of the original url we have to
227 if 'secret_token' in query
:
228 real_url
+= '?secret_token=' + query
[ 'secret_token' ][ 0 ]
229 return self
. url_result ( real_url
)
231 # extract uploader (which is in the url)
232 uploader
= mobj
. group ( 'uploader' )
233 # extract simple title (uploader + slug of song title)
234 slug_title
= mobj
. group ( 'title' )
235 token
= mobj
. group ( 'token' )
236 full_title
= resolve_title
= ' %s / %s ' % ( uploader
, slug_title
)
238 resolve_title
+= '/ %s ' % token
240 self
. report_resolve ( full_title
)
242 url
= 'http://soundcloud.com/ %s ' % resolve_title
243 info_json_url
= self
._ resolv
_u rl
( url
)
244 info
= self
._ download
_ json
( info_json_url
, full_title
, 'Downloading info JSON' )
246 return self
._ extract
_ info
_ dict
( info
, full_title
, secret_token
= token
)
249 class SoundcloudSetIE ( SoundcloudIE
):
250 _VALID_URL
= r
'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
251 IE_NAME
= 'soundcloud:set'
253 'url' : 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep' ,
256 'title' : 'The Royal Concept EP' ,
258 'playlist_mincount' : 6 ,
261 def _real_extract ( self
, url
):
262 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
264 # extract uploader (which is in the url)
265 uploader
= mobj
. group ( 'uploader' )
266 # extract simple title (uploader + slug of song title)
267 slug_title
= mobj
. group ( 'slug_title' )
268 full_title
= ' %s /sets/ %s ' % ( uploader
, slug_title
)
269 url
= 'http://soundcloud.com/ %s /sets/ %s ' % ( uploader
, slug_title
)
271 token
= mobj
. group ( 'token' )
273 full_title
+= '/' + token
276 self
. report_resolve ( full_title
)
278 resolv_url
= self
._ resolv
_u rl
( url
)
279 info
= self
._ download
_ json
( resolv_url
, full_title
)
282 msgs
= ( compat_str ( err
[ 'error_message' ]) for err
in info
[ 'errors' ])
283 raise ExtractorError ( 'unable to download video webpage: %s ' % ',' . join ( msgs
))
285 entries
= [ self
. url_result ( track
[ 'permalink_url' ], 'Soundcloud' ) for track
in info
[ 'tracks' ]]
290 'id' : ' %s ' % info
[ 'id' ],
291 'title' : info
[ 'title' ],
295 class SoundcloudUserIE ( SoundcloudIE
):
296 _VALID_URL
= r
'''(?x)
298 (?:(?:www|m)\.)?soundcloud\.com/
301 (?P<rsrc>tracks|sets|reposts|likes|spotlight)
305 IE_NAME
= 'soundcloud:user'
307 'url' : 'https://soundcloud.com/the-akashic-chronicler' ,
310 'title' : 'The Akashic Chronicler (All)' ,
312 'playlist_mincount' : 111 ,
314 'url' : 'https://soundcloud.com/the-akashic-chronicler/tracks' ,
317 'title' : 'The Akashic Chronicler (Tracks)' ,
319 'playlist_mincount' : 50 ,
321 'url' : 'https://soundcloud.com/the-akashic-chronicler/sets' ,
324 'title' : 'The Akashic Chronicler (Playlists)' ,
326 'playlist_mincount' : 3 ,
328 'url' : 'https://soundcloud.com/the-akashic-chronicler/reposts' ,
331 'title' : 'The Akashic Chronicler (Reposts)' ,
333 'playlist_mincount' : 7 ,
335 'url' : 'https://soundcloud.com/the-akashic-chronicler/likes' ,
338 'title' : 'The Akashic Chronicler (Likes)' ,
340 'playlist_mincount' : 321 ,
342 'url' : 'https://soundcloud.com/grynpyret/spotlight' ,
345 'title' : 'Grynpyret (Spotlight)' ,
347 'playlist_mincount' : 1 ,
350 _API_BASE
= 'https://api.soundcloud.com'
351 _API_V2_BASE
= 'https://api-v2.soundcloud.com'
354 'all' : ' %s /profile/soundcloud:users: %%s ' % _API_V2_BASE
,
355 'tracks' : ' %s /users/ %%s /tracks' % _API_BASE
,
356 'sets' : ' %s /users/ %%s /playlists' % _API_V2_BASE
,
357 'reposts' : ' %s /profile/soundcloud:users: %%s /reposts' % _API_V2_BASE
,
358 'likes' : ' %s /users/ %%s /likes' % _API_V2_BASE
,
359 'spotlight' : ' %s /users/ %%s /spotlight' % _API_V2_BASE
,
366 'reposts' : 'Reposts' ,
368 'spotlight' : 'Spotlight' ,
371 def _real_extract ( self
, url
):
372 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
373 uploader
= mobj
. group ( 'user' )
375 url
= 'http://soundcloud.com/ %s /' % uploader
376 resolv_url
= self
._ resolv
_u rl
( url
)
377 user
= self
._ download
_ json
(
378 resolv_url
, uploader
, 'Downloading user info' )
380 resource
= mobj
. group ( 'rsrc' ) or 'all'
381 base_url
= self
._ BASE
_U RL
_ MAP
[ resource
] % user
[ 'id' ]
386 for i
in itertools
. count ():
388 data
= compat_urllib_parse
. urlencode ({
391 'client_id' : self
._ CLIENT
_ ID
,
392 'linked_partitioning' : '1' ,
393 'representation' : 'speedy' ,
395 next_href
= base_url
+ '?' + data
397 response
= self
._ download
_ json
(
398 next_href
, uploader
, 'Downloading track page %s ' % ( i
+ 1 ))
400 collection
= response
[ 'collection' ]
403 self
. to_screen ( ' %s : End page received' % uploader
)
406 def resolve_permalink_url ( candidates
):
407 for cand
in candidates
:
408 if isinstance ( cand
, dict ):
409 permalink_url
= cand
. get ( 'permalink_url' )
410 if permalink_url
and permalink_url
. startswith ( 'http' ):
414 permalink_url
= resolve_permalink_url (( e
, e
. get ( 'track' ), e
. get ( 'playlist' )))
416 entries
. append ( self
. url_result ( permalink_url
))
418 if 'next_href' in response
:
419 next_href
= response
[ 'next_href' ]
427 'id' : compat_str ( user
[ 'id' ]),
428 'title' : ' %s ( %s )' % ( user
[ 'username' ], self
._ TITLE
_ MAP
[ resource
]),
433 class SoundcloudPlaylistIE ( SoundcloudIE
):
434 _VALID_URL
= r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
435 IE_NAME
= 'soundcloud:playlist'
437 'url' : 'http://api.soundcloud.com/playlists/4110309' ,
440 'title' : 'TILT Brass - Bowery Poetry Club, August \' 03 [Non-Site SCR 02]' ,
441 'description' : 're:.*?TILT Brass - Bowery Poetry Club' ,
446 def _real_extract ( self
, url
):
447 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
448 playlist_id
= mobj
. group ( 'id' )
449 base_url
= ' %s //api.soundcloud.com/playlists/ %s .json?' % ( self
. http_scheme (), playlist_id
)
452 'client_id' : self
._ CLIENT
_ ID
,
454 token
= mobj
. group ( 'token' )
457 data_dict
[ 'secret_token' ] = token
459 data
= compat_urllib_parse
. urlencode ( data_dict
)
460 data
= self
._ download
_ json
(
461 base_url
+ data
, playlist_id
, 'Downloading playlist' )
463 entries
= [ self
. url_result ( track
[ 'permalink_url' ], 'Soundcloud' ) for track
in data
[ 'tracks' ]]
468 'title' : data
. get ( 'title' ),
469 'description' : data
. get ( 'description' ),