]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py
5 from . common
import InfoExtractor
16 class SoundcloudIE ( InfoExtractor
):
17 """Information extractor for soundcloud.com
18 To access the media, the uid of the song and a stream token
19 must be extracted from the page source and the script must make
20 a request to media.soundcloud.com/crossdomain.xml. Then
21 the media can be grabbed by requesting from an url composed
22 of the stream token and uid
25 _VALID_URL
= r
'''^(?:https?://)?
26 (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
27 |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
28 |(?P<widget>w.soundcloud.com/player/?.*?url=.*)
31 IE_NAME
= u
'soundcloud'
34 u
'url' : u
'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,
35 u
'file' : u
'62986583.mp3' ,
36 u
'md5' : u
'ebef0a451b909710ed1d7787dddbf0d7' ,
38 u
"upload_date" : u
"20121011" ,
39 u
"description" : u
"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,
40 u
"uploader" : u
"E.T. ExTerrestrial Music" ,
41 u
"title" : u
"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
46 u
'url' : u
'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,
50 u
'title' : u
'Goldrushed' ,
51 u
'uploader' : u
'The Royal Concept' ,
52 u
'upload_date' : u
'20120521' ,
56 u
'skip_download' : True ,
61 _CLIENT_ID
= 'b45b1aa10f1ac2941910a7f0d10f8e28'
62 _IPHONE_CLIENT_ID
= '376f225bf427445fc4bfb6b99b72e0bf'
65 def suitable ( cls
, url
):
66 return re
. match ( cls
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
) is not None
68 def report_resolve ( self
, video_id
):
69 """Report information extraction."""
70 self
. to_screen ( u
' %s : Resolving id' % video_id
)
73 def _resolv_url ( cls
, url
):
74 return 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=' + cls
._ CLIENT
_ ID
76 def _extract_info_dict ( self
, info
, full_title
= None , quiet
= False ):
77 track_id
= compat_str ( info
[ 'id' ])
78 name
= full_title
or track_id
80 self
. report_extraction ( name
)
82 thumbnail
= info
[ 'artwork_url' ]
83 if thumbnail
is not None :
84 thumbnail
= thumbnail
. replace ( '-large' , '-t500x500' )
85 ext
= info
. get ( 'original_format' , u
'mp3' )
88 'uploader' : info
[ 'user' ][ 'username' ],
89 'upload_date' : unified_strdate ( info
[ 'created_at' ]),
90 'title' : info
[ 'title' ],
91 'description' : info
[ 'description' ],
92 'thumbnail' : thumbnail
,
94 if info
. get ( 'downloadable' , False ):
95 # We can build a direct link to the song
97 u
'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (
98 track_id
, self
._ CLIENT
_ ID
))
99 result
[ 'formats' ] = [{
100 'format_id' : 'download' ,
106 # We have to retrieve the url
107 stream_json
= self
._ download
_ webpage
(
108 'http://api.soundcloud.com/i1/tracks/ {0} /streams?client_id= {1} ' . format ( track_id
, self
._ IPHONE
_ CLIENT
_ ID
),
109 track_id
, u
'Downloading track url' )
112 format_dict
= json
. loads ( stream_json
)
113 for key
, stream_url
in format_dict
. items ():
114 if key
. startswith ( u
'http' ):
121 elif key
. startswith ( u
'rtmp' ):
122 # The url doesn't have an rtmp app, we have to extract the playpath
123 url
, path
= stream_url
. split ( 'mp3:' , 1 )
127 'play_path' : 'mp3:' + path
,
133 # We fallback to the stream_url in the original info, this
134 # cannot be always used, sometimes it can give an HTTP 404 error
136 'format_id' : u
'fallback' ,
137 'url' : info
[ 'stream_url' ] + '?client_id=' + self
._ CLIENT
_ ID
,
143 if f
[ 'format_id' ]. startswith ( 'http' ):
145 if f
[ 'format_id' ]. startswith ( 'rtmp' ):
149 formats
. sort ( key
= format_pref
)
150 result
[ 'formats' ] = formats
154 def _real_extract ( self
, url
):
155 mobj
= re
. match ( self
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
)
157 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
159 track_id
= mobj
. group ( 'track_id' )
160 if track_id
is not None :
161 info_json_url
= 'http://api.soundcloud.com/tracks/' + track_id
+ '.json?client_id=' + self
._ CLIENT
_ ID
162 full_title
= track_id
163 elif mobj
. group ( 'widget' ):
164 query
= compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)
165 return self
. url_result ( query
[ 'url' ][ 0 ], ie
= 'Soundcloud' )
167 # extract uploader (which is in the url)
168 uploader
= mobj
. group ( 1 )
169 # extract simple title (uploader + slug of song title)
170 slug_title
= mobj
. group ( 2 )
171 full_title
= ' %s / %s ' % ( uploader
, slug_title
)
173 self
. report_resolve ( full_title
)
175 url
= 'http://soundcloud.com/ %s / %s ' % ( uploader
, slug_title
)
176 info_json_url
= self
._ resolv
_u rl
( url
)
177 info_json
= self
._ download
_ webpage
( info_json_url
, full_title
, u
'Downloading info JSON' )
179 info
= json
. loads ( info_json
)
180 return self
._ extract
_ info
_ dict
( info
, full_title
)
182 class SoundcloudSetIE ( SoundcloudIE
):
183 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
184 IE_NAME
= u
'soundcloud:set'
185 # it's in tests/test_playlists.py
188 def _real_extract ( self
, url
):
189 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
191 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
193 # extract uploader (which is in the url)
194 uploader
= mobj
. group ( 1 )
195 # extract simple title (uploader + slug of song title)
196 slug_title
= mobj
. group ( 2 )
197 full_title
= ' %s /sets/ %s ' % ( uploader
, slug_title
)
199 self
. report_resolve ( full_title
)
201 url
= 'http://soundcloud.com/ %s /sets/ %s ' % ( uploader
, slug_title
)
202 resolv_url
= self
._ resolv
_u rl
( url
)
203 info_json
= self
._ download
_ webpage
( resolv_url
, full_title
)
205 info
= json
. loads ( info_json
)
207 for err
in info
[ 'errors' ]:
208 self
._ downloader
. report_error ( u
'unable to download video webpage: %s ' % compat_str ( err
[ 'error_message' ]))
211 self
. report_extraction ( full_title
)
212 return { '_type' : 'playlist' ,
213 'entries' : [ self
._ extract
_ info
_ dict
( track
) for track
in info
[ 'tracks' ]],
215 'title' : info
[ 'title' ],
219 class SoundcloudUserIE ( SoundcloudIE
):
220 _VALID_URL
= r
'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
221 IE_NAME
= u
'soundcloud:user'
223 # it's in tests/test_playlists.py
226 def _real_extract ( self
, url
):
227 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
228 uploader
= mobj
. group ( 'user' )
230 url
= 'http://soundcloud.com/ %s /' % uploader
231 resolv_url
= self
._ resolv
_u rl
( url
)
232 user_json
= self
._ download
_ webpage
( resolv_url
, uploader
,
233 u
'Downloading user info' )
234 user
= json
. loads ( user_json
)
237 for i
in itertools
. count ():
238 data
= compat_urllib_parse
. urlencode ({ 'offset' : i
* 50 ,
239 'client_id' : self
._ CLIENT
_ ID
,
241 tracks_url
= 'http://api.soundcloud.com/users/ %s /tracks.json?' % user
[ 'id' ] + data
242 response
= self
._ download
_ webpage
( tracks_url
, uploader
,
243 u
'Downloading tracks page %s ' % ( i
+ 1 ))
244 new_tracks
= json
. loads ( response
)
245 tracks
. extend ( self
._ extract
_ info
_ dict
( track
, quiet
= True ) for track
in new_tracks
)
246 if len ( new_tracks
) < 50 :
251 'id' : compat_str ( user
[ 'id' ]),
252 'title' : user
[ 'username' ],