]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py
2 from __future__
import unicode_literals
8 from . common
import InfoExtractor
19 class SoundcloudIE ( InfoExtractor
):
20 """Information extractor for soundcloud.com
21 To access the media, the uid of the song and a stream token
22 must be extracted from the page source and the script must make
23 a request to media.soundcloud.com/crossdomain.xml. Then
24 the media can be grabbed by requesting from an url composed
25 of the stream token and uid
28 _VALID_URL
= r
'''^(?:https?://)?
29 (?:(?:(?:www\.|m\.)?soundcloud\.com/
30 (?P<uploader>[\w\d-]+)/
31 (?!sets/)(?P<title>[\w\d-]+)/?
32 (?P<token>[^?]+?)?(?:[?].*)?$)
33 |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
34 |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
37 IE_NAME
= 'soundcloud'
40 'url' : 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,
41 'file' : '62986583.mp3' ,
42 'md5' : 'ebef0a451b909710ed1d7787dddbf0d7' ,
44 "upload_date" : "20121011" ,
45 "description" : "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,
46 "uploader" : "E.T. ExTerrestrial Music" ,
47 "title" : "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
52 'url' : 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,
56 'title' : 'Goldrushed' ,
57 'uploader' : 'The Royal Concept' ,
58 'upload_date' : '20120521' ,
62 'skip_download' : True ,
67 'url' : 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,
68 'md5' : 'aa0dd32bfea9b0c5ef4f02aacd080604' ,
72 'title' : 'Youtube - Dl Test Video \'\' Ä↭' ,
73 'uploader' : 'jaimeMF' ,
74 'description' : 'test chars: \"\' / \\ ä↭' ,
75 'upload_date' : '20131209' ,
80 'url' : 'https://soundcloud.com/simgretina/just-your-problem-baby-1' ,
81 'md5' : '56a8b69568acaa967b4c49f9d1d52d19' ,
85 'title' : 'Just Your Problem Baby (Acapella)' ,
86 'description' : 'Vocals' ,
87 'uploader' : 'Sim Gretina' ,
88 'upload_date' : '20130815' ,
93 _CLIENT_ID
= 'b45b1aa10f1ac2941910a7f0d10f8e28'
94 _IPHONE_CLIENT_ID
= '376f225bf427445fc4bfb6b99b72e0bf'
97 def suitable ( cls
, url
):
98 return re
. match ( cls
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
) is not None
100 def report_resolve ( self
, video_id
):
101 """Report information extraction."""
102 self
. to_screen ( u
' %s : Resolving id' % video_id
)
105 def _resolv_url ( cls
, url
):
106 return 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=' + cls
._ CLIENT
_ ID
108 def _extract_info_dict ( self
, info
, full_title
= None , quiet
= False , secret_token
= None ):
109 track_id
= compat_str ( info
[ 'id' ])
110 name
= full_title
or track_id
112 self
. report_extraction ( name
)
114 thumbnail
= info
[ 'artwork_url' ]
115 if thumbnail
is not None :
116 thumbnail
= thumbnail
. replace ( '-large' , '-t500x500' )
120 'uploader' : info
[ 'user' ][ 'username' ],
121 'upload_date' : unified_strdate ( info
[ 'created_at' ]),
122 'title' : info
[ 'title' ],
123 'description' : info
[ 'description' ],
124 'thumbnail' : thumbnail
,
126 if info
. get ( 'downloadable' , False ):
127 # We can build a direct link to the song
129 'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (
130 track_id
, self
._ CLIENT
_ ID
))
131 result
[ 'formats' ] = [{
132 'format_id' : 'download' ,
133 'ext' : info
. get ( 'original_format' , 'mp3' ),
138 # We have to retrieve the url
139 streams_url
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'
140 'client_id= {1} &secret_token= {2} ' . format ( track_id
, self
._ IPHONE
_ CLIENT
_ ID
, secret_token
))
141 stream_json
= self
._ download
_ webpage
(
143 track_id
, 'Downloading track url' )
146 format_dict
= json
. loads ( stream_json
)
147 for key
, stream_url
in format_dict
. items ():
148 if key
. startswith ( u
'http' ):
155 elif key
. startswith ( u
'rtmp' ):
156 # The url doesn't have an rtmp app, we have to extract the playpath
157 url
, path
= stream_url
. split ( 'mp3:' , 1 )
161 'play_path' : 'mp3:' + path
,
167 # We fallback to the stream_url in the original info, this
168 # cannot be always used, sometimes it can give an HTTP 404 error
170 'format_id' : 'fallback' ,
171 'url' : info
[ 'stream_url' ] + '?client_id=' + self
._ CLIENT
_ ID
,
177 if f
[ 'format_id' ]. startswith ( 'http' ):
178 f
[ 'protocol' ] = 'http'
179 if f
[ 'format_id' ]. startswith ( 'rtmp' ):
180 f
[ 'protocol' ] = 'rtmp'
182 self
._ sort
_ formats
( formats
)
183 result
[ 'formats' ] = formats
187 def _real_extract ( self
, url
):
188 mobj
= re
. match ( self
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
)
190 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
192 track_id
= mobj
. group ( 'track_id' )
194 if track_id
is not None :
195 info_json_url
= 'http://api.soundcloud.com/tracks/' + track_id
+ '.json?client_id=' + self
._ CLIENT
_ ID
196 full_title
= track_id
197 elif mobj
. group ( 'player' ):
198 query
= compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)
199 return self
. url_result ( query
[ 'url' ][ 0 ], ie
= 'Soundcloud' )
201 # extract uploader (which is in the url)
202 uploader
= mobj
. group ( 'uploader' )
203 # extract simple title (uploader + slug of song title)
204 slug_title
= mobj
. group ( 'title' )
205 token
= mobj
. group ( 'token' )
206 full_title
= resolve_title
= ' %s / %s ' % ( uploader
, slug_title
)
208 resolve_title
+= '/ %s ' % token
210 self
. report_resolve ( full_title
)
212 url
= 'http://soundcloud.com/ %s ' % resolve_title
213 info_json_url
= self
._ resolv
_u rl
( url
)
214 info_json
= self
._ download
_ webpage
( info_json_url
, full_title
, 'Downloading info JSON' )
216 info
= json
. loads ( info_json
)
217 return self
._ extract
_ info
_ dict
( info
, full_title
, secret_token
= token
)
219 class SoundcloudSetIE ( SoundcloudIE
):
220 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
221 IE_NAME
= 'soundcloud:set'
222 # it's in tests/test_playlists.py
225 def _real_extract ( self
, url
):
226 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
228 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
230 # extract uploader (which is in the url)
231 uploader
= mobj
. group ( 1 )
232 # extract simple title (uploader + slug of song title)
233 slug_title
= mobj
. group ( 2 )
234 full_title
= ' %s /sets/ %s ' % ( uploader
, slug_title
)
236 self
. report_resolve ( full_title
)
238 url
= 'http://soundcloud.com/ %s /sets/ %s ' % ( uploader
, slug_title
)
239 resolv_url
= self
._ resolv
_u rl
( url
)
240 info_json
= self
._ download
_ webpage
( resolv_url
, full_title
)
242 info
= json
. loads ( info_json
)
244 for err
in info
[ 'errors' ]:
245 self
._ downloader
. report_error ( u
'unable to download video webpage: %s ' % compat_str ( err
[ 'error_message' ]))
248 self
. report_extraction ( full_title
)
249 return { '_type' : 'playlist' ,
250 'entries' : [ self
._ extract
_ info
_ dict
( track
) for track
in info
[ 'tracks' ]],
252 'title' : info
[ 'title' ],
256 class SoundcloudUserIE ( SoundcloudIE
):
257 _VALID_URL
= r
'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
258 IE_NAME
= 'soundcloud:user'
260 # it's in tests/test_playlists.py
263 def _real_extract ( self
, url
):
264 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
265 uploader
= mobj
. group ( 'user' )
267 url
= 'http://soundcloud.com/ %s /' % uploader
268 resolv_url
= self
._ resolv
_u rl
( url
)
269 user_json
= self
._ download
_ webpage
( resolv_url
, uploader
,
270 'Downloading user info' )
271 user
= json
. loads ( user_json
)
274 for i
in itertools
. count ():
275 data
= compat_urllib_parse
. urlencode ({ 'offset' : i
* 50 ,
276 'client_id' : self
._ CLIENT
_ ID
,
278 tracks_url
= 'http://api.soundcloud.com/users/ %s /tracks.json?' % user
[ 'id' ] + data
279 response
= self
._ download
_ webpage
( tracks_url
, uploader
,
280 'Downloading tracks page %s ' % ( i
+ 1 ))
281 new_tracks
= json
. loads ( response
)
282 tracks
. extend ( self
._ extract
_ info
_ dict
( track
, quiet
= True ) for track
in new_tracks
)
283 if len ( new_tracks
) < 50 :
288 'id' : compat_str ( user
[ 'id' ]),
289 'title' : user
[ 'username' ],