]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py
3c1d058db9cf546bb94a299d9f1badd79fab12d3
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
19 class SoundcloudIE ( InfoExtractor
):
20 """Information extractor for soundcloud.com
21 To access the media, the uid of the song and a stream token
22 must be extracted from the page source and the script must make
23 a request to media.soundcloud.com/crossdomain.xml. Then
24 the media can be grabbed by requesting from an url composed
25 of the stream token and uid
28 _VALID_URL
= r
'''(?x)^(?:https?://)?
29 (?:(?:(?:www\.|m\.)?soundcloud\.com/
30 (?P<uploader>[\w\d-]+)/
31 (?!sets/|likes/?(?:$|[?#]))
33 (?P<token>[^?]+?)?(?:[?].*)?$)
34 |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
35 (?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)
36 |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
39 IE_NAME
= 'soundcloud'
42 'url' : 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,
43 'md5' : 'ebef0a451b909710ed1d7787dddbf0d7' ,
47 'upload_date' : '20121011' ,
48 'description' : 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d' ,
49 'uploader' : 'E.T. ExTerrestrial Music' ,
50 'title' : 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1' ,
56 'url' : 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,
60 'title' : 'Goldrushed' ,
61 'description' : 'From Stockholm Sweden \r\n Povel / Magnus / Filip / David \r\n www.theroyalconcept.com' ,
62 'uploader' : 'The Royal Concept' ,
63 'upload_date' : '20120521' ,
68 'skip_download' : True ,
73 'url' : 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,
74 'md5' : 'aa0dd32bfea9b0c5ef4f02aacd080604' ,
78 'title' : 'Youtube - Dl Test Video \'\' Ä↭' ,
79 'uploader' : 'jaimeMF' ,
80 'description' : 'test chars: \"\' / \\ ä↭' ,
81 'upload_date' : '20131209' ,
85 # private link (alt format)
87 'url' : 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp' ,
88 'md5' : 'aa0dd32bfea9b0c5ef4f02aacd080604' ,
92 'title' : 'Youtube - Dl Test Video \'\' Ä↭' ,
93 'uploader' : 'jaimeMF' ,
94 'description' : 'test chars: \"\' / \\ ä↭' ,
95 'upload_date' : '20131209' ,
101 'url' : 'https://soundcloud.com/oddsamples/bus-brakes' ,
102 'md5' : '7624f2351f8a3b2e7cd51522496e7631' ,
106 'title' : 'Bus Brakes' ,
107 'description' : 'md5:0053ca6396e8d2fd7b7e1595ef12ab66' ,
108 'uploader' : 'oddsamples' ,
109 'upload_date' : '20140109' ,
115 _CLIENT_ID
= 'b45b1aa10f1ac2941910a7f0d10f8e28'
116 _IPHONE_CLIENT_ID
= '376f225bf427445fc4bfb6b99b72e0bf'
118 def report_resolve ( self
, video_id
):
119 """Report information extraction."""
120 self
. to_screen ( ' %s : Resolving id' % video_id
)
123 def _resolv_url ( cls
, url
):
124 return 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=' + cls
._ CLIENT
_ ID
126 def _extract_info_dict ( self
, info
, full_title
= None , quiet
= False , secret_token
= None ):
127 track_id
= compat_str ( info
[ 'id' ])
128 name
= full_title
or track_id
130 self
. report_extraction ( name
)
132 thumbnail
= info
[ 'artwork_url' ]
133 if thumbnail
is not None :
134 thumbnail
= thumbnail
. replace ( '-large' , '-t500x500' )
138 'uploader' : info
[ 'user' ][ 'username' ],
139 'upload_date' : unified_strdate ( info
[ 'created_at' ]),
140 'title' : info
[ 'title' ],
141 'description' : info
[ 'description' ],
142 'thumbnail' : thumbnail
,
143 'duration' : int_or_none ( info
. get ( 'duration' ), 1000 ),
144 'webpage_url' : info
. get ( 'permalink_url' ),
147 if info
. get ( 'downloadable' , False ):
148 # We can build a direct link to the song
150 'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (
151 track_id
, self
._ CLIENT
_ ID
))
153 'format_id' : 'download' ,
154 'ext' : info
. get ( 'original_format' , 'mp3' ),
160 # We have to retrieve the url
161 streams_url
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'
162 'client_id= {1} &secret_token= {2} ' . format ( track_id
, self
._ IPHONE
_ CLIENT
_ ID
, secret_token
))
163 format_dict
= self
._ download
_ json
(
165 track_id
, 'Downloading track url' )
167 for key
, stream_url
in format_dict
. items ():
168 if key
. startswith ( 'http' ):
175 elif key
. startswith ( 'rtmp' ):
176 # The url doesn't have an rtmp app, we have to extract the playpath
177 url
, path
= stream_url
. split ( 'mp3:' , 1 )
181 'play_path' : 'mp3:' + path
,
187 # We fallback to the stream_url in the original info, this
188 # cannot be always used, sometimes it can give an HTTP 404 error
190 'format_id' : 'fallback' ,
191 'url' : info
[ 'stream_url' ] + '?client_id=' + self
._ CLIENT
_ ID
,
197 if f
[ 'format_id' ]. startswith ( 'http' ):
198 f
[ 'protocol' ] = 'http'
199 if f
[ 'format_id' ]. startswith ( 'rtmp' ):
200 f
[ 'protocol' ] = 'rtmp'
202 self
._ sort
_ formats
( formats
)
203 result
[ 'formats' ] = formats
207 def _real_extract ( self
, url
):
208 mobj
= re
. match ( self
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
)
210 raise ExtractorError ( 'Invalid URL: %s ' % url
)
212 track_id
= mobj
. group ( 'track_id' )
214 if track_id
is not None :
215 info_json_url
= 'http://api.soundcloud.com/tracks/' + track_id
+ '.json?client_id=' + self
._ CLIENT
_ ID
216 full_title
= track_id
217 token
= mobj
. group ( 'secret_token' )
219 info_json_url
+= "&secret_token=" + token
220 elif mobj
. group ( 'player' ):
221 query
= compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)
222 return self
. url_result ( query
[ 'url' ][ 0 ])
224 # extract uploader (which is in the url)
225 uploader
= mobj
. group ( 'uploader' )
226 # extract simple title (uploader + slug of song title)
227 slug_title
= mobj
. group ( 'title' )
228 token
= mobj
. group ( 'token' )
229 full_title
= resolve_title
= ' %s / %s ' % ( uploader
, slug_title
)
231 resolve_title
+= '/ %s ' % token
233 self
. report_resolve ( full_title
)
235 url
= 'http://soundcloud.com/ %s ' % resolve_title
236 info_json_url
= self
._ resolv
_u rl
( url
)
237 info
= self
._ download
_ json
( info_json_url
, full_title
, 'Downloading info JSON' )
239 return self
._ extract
_ info
_ dict
( info
, full_title
, secret_token
= token
)
242 class SoundcloudSetIE ( SoundcloudIE
):
243 _VALID_URL
= r
'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
244 IE_NAME
= 'soundcloud:set'
246 'url' : 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep' ,
248 'title' : 'The Royal Concept EP' ,
250 'playlist_mincount' : 6 ,
253 def _real_extract ( self
, url
):
254 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
256 # extract uploader (which is in the url)
257 uploader
= mobj
. group ( 'uploader' )
258 # extract simple title (uploader + slug of song title)
259 slug_title
= mobj
. group ( 'slug_title' )
260 full_title
= ' %s /sets/ %s ' % ( uploader
, slug_title
)
261 url
= 'http://soundcloud.com/ %s /sets/ %s ' % ( uploader
, slug_title
)
263 token
= mobj
. group ( 'token' )
265 full_title
+= '/' + token
268 self
. report_resolve ( full_title
)
270 resolv_url
= self
._ resolv
_u rl
( url
)
271 info
= self
._ download
_ json
( resolv_url
, full_title
)
274 for err
in info
[ 'errors' ]:
275 self
._ downloader
. report_error ( 'unable to download video webpage: %s ' % compat_str ( err
[ 'error_message' ]))
280 'entries' : [ self
._ extract
_ info
_ dict
( track
, secret_token
= token
) for track
in info
[ 'tracks' ]],
282 'title' : info
[ 'title' ],
286 class SoundcloudUserIE ( SoundcloudIE
):
287 _VALID_URL
= r
'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
288 IE_NAME
= 'soundcloud:user'
290 'url' : 'https://soundcloud.com/the-concept-band' ,
293 'title' : 'The Royal Concept' ,
295 'playlist_mincount' : 12
297 'url' : 'https://soundcloud.com/the-concept-band/likes' ,
300 'title' : 'The Royal Concept' ,
302 'playlist_mincount' : 1 ,
305 def _real_extract ( self
, url
):
306 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
307 uploader
= mobj
. group ( 'user' )
308 resource
= mobj
. group ( 'rsrc' )
311 elif resource
== 'likes' :
312 resource
= 'favorites'
314 url
= 'http://soundcloud.com/ %s /' % uploader
315 resolv_url
= self
._ resolv
_u rl
( url
)
316 user
= self
._ download
_ json
(
317 resolv_url
, uploader
, 'Downloading user info' )
318 base_url
= 'http://api.soundcloud.com/users/ %s / %s .json?' % ( uploader
, resource
)
321 for i
in itertools
. count ():
322 data
= compat_urllib_parse
. urlencode ({
325 'client_id' : self
._ CLIENT
_ ID
,
327 new_entries
= self
._ download
_ json
(
328 base_url
+ data
, uploader
, 'Downloading track page %s ' % ( i
+ 1 ))
329 if len ( new_entries
) == 0 :
330 self
. to_screen ( ' %s : End page received' % uploader
)
332 entries
. extend ( self
._ extract
_ info
_ dict
( e
, quiet
= True ) for e
in new_entries
)
336 'id' : compat_str ( user
[ 'id' ]),
337 'title' : user
[ 'username' ],
342 class SoundcloudPlaylistIE ( SoundcloudIE
):
343 _VALID_URL
= r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
344 IE_NAME
= 'soundcloud:playlist'
346 'url' : 'http://api.soundcloud.com/playlists/4110309' ,
349 'title' : 'TILT Brass - Bowery Poetry Club, August \' 03 [Non-Site SCR 02]' ,
350 'description' : 're:.*?TILT Brass - Bowery Poetry Club' ,
355 def _real_extract ( self
, url
):
356 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
357 playlist_id
= mobj
. group ( 'id' )
358 base_url
= ' %s //api.soundcloud.com/playlists/ %s .json?' % ( self
. http_scheme (), playlist_id
)
361 'client_id' : self
._ CLIENT
_ ID
,
363 token
= mobj
. group ( 'token' )
366 data_dict
[ 'secret_token' ] = token
368 data
= compat_urllib_parse
. urlencode ( data_dict
)
369 data
= self
._ download
_ json
(
370 base_url
+ data
, playlist_id
, 'Downloading playlist' )
373 self
._ extract
_ info
_ dict
( t
, quiet
= True , secret_token
= token
)
374 for t
in data
[ 'tracks' ]]
379 'title' : data
. get ( 'title' ),
380 'description' : data
. get ( 'description' ),