]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py
4 from . common
import InfoExtractor
13 class SoundcloudIE ( InfoExtractor
):
14 """Information extractor for soundcloud.com
15 To access the media, the uid of the song and a stream token
16 must be extracted from the page source and the script must make
17 a request to media.soundcloud.com/crossdomain.xml. Then
18 the media can be grabbed by requesting from an url composed
19 of the stream token and uid
22 _VALID_URL
= r
'''^(?:https?://)?
23 (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
24 |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
27 IE_NAME
= u
'soundcloud'
29 u
'url' : u
'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,
30 u
'file' : u
'62986583.mp3' ,
31 u
'md5' : u
'ebef0a451b909710ed1d7787dddbf0d7' ,
33 u
"upload_date" : u
"20121011" ,
34 u
"description" : u
"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,
35 u
"uploader" : u
"E.T. ExTerrestrial Music" ,
36 u
"title" : u
"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
40 _CLIENT_ID
= 'b45b1aa10f1ac2941910a7f0d10f8e28'
43 def suitable ( cls
, url
):
44 return re
. match ( cls
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
) is not None
46 def report_resolve ( self
, video_id
):
47 """Report information extraction."""
48 self
. to_screen ( u
' %s : Resolving id' % video_id
)
51 def _resolv_url ( cls
, url
):
52 return 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=' + cls
._ CLIENT
_ ID
54 def _extract_info_dict ( self
, info
, full_title
= None ):
56 name
= full_title
or video_id
57 self
. report_extraction ( name
)
59 thumbnail
= info
[ 'artwork_url' ]
60 if thumbnail
is not None :
61 thumbnail
= thumbnail
. replace ( '-large' , '-t500x500' )
64 'url' : info
[ 'stream_url' ] + '?client_id=' + self
._ CLIENT
_ ID
,
65 'uploader' : info
[ 'user' ][ 'username' ],
66 'upload_date' : unified_strdate ( info
[ 'created_at' ]),
67 'title' : info
[ 'title' ],
69 'description' : info
[ 'description' ],
70 'thumbnail' : thumbnail
,
73 def _real_extract ( self
, url
):
74 mobj
= re
. match ( self
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
)
76 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
78 track_id
= mobj
. group ( 'track_id' )
79 if track_id
is not None :
80 info_json_url
= 'http://api.soundcloud.com/tracks/' + track_id
+ '.json?client_id=' + self
._ CLIENT
_ ID
83 # extract uploader (which is in the url)
84 uploader
= mobj
. group ( 1 )
85 # extract simple title (uploader + slug of song title)
86 slug_title
= mobj
. group ( 2 )
87 full_title
= ' %s / %s ' % ( uploader
, slug_title
)
89 self
. report_resolve ( full_title
)
91 url
= 'http://soundcloud.com/ %s / %s ' % ( uploader
, slug_title
)
92 info_json_url
= self
._ resolv
_u rl
( url
)
93 info_json
= self
._ download
_ webpage
( info_json_url
, full_title
, u
'Downloading info JSON' )
95 info
= json
. loads ( info_json
)
96 return self
._ extract
_ info
_ dict
( info
, full_title
)
98 class SoundcloudSetIE ( SoundcloudIE
):
99 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
100 IE_NAME
= u
'soundcloud:set'
102 u
"url" : "https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep" ,
105 u
"file" : "30510138.mp3" ,
106 u
"md5" : "f9136bf103901728f29e419d2c70f55d" ,
108 u
"upload_date" : u
"20111213" ,
109 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
110 u
"uploader" : u
"The Royal Concept" ,
111 u
"title" : u
"D-D-Dance"
115 u
"file" : "47127625.mp3" ,
116 u
"md5" : "09b6758a018470570f8fd423c9453dd8" ,
118 u
"upload_date" : u
"20120521" ,
119 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
120 u
"uploader" : u
"The Royal Concept" ,
121 u
"title" : u
"The Royal Concept - Gimme Twice"
125 u
"file" : "47127627.mp3" ,
126 u
"md5" : "154abd4e418cea19c3b901f1e1306d9c" ,
128 u
"upload_date" : u
"20120521" ,
129 u
"uploader" : u
"The Royal Concept" ,
130 u
"title" : u
"Goldrushed"
134 u
"file" : "47127629.mp3" ,
135 u
"md5" : "2f5471edc79ad3f33a683153e96a79c1" ,
137 u
"upload_date" : u
"20120521" ,
138 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
139 u
"uploader" : u
"The Royal Concept" ,
140 u
"title" : u
"In the End"
144 u
"file" : "47127631.mp3" ,
145 u
"md5" : "f9ba87aa940af7213f98949254f1c6e2" ,
147 u
"upload_date" : u
"20120521" ,
148 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / David / Povel / Magnus \r\n www.theroyalconceptband.com" ,
149 u
"uploader" : u
"The Royal Concept" ,
150 u
"title" : u
"Knocked Up"
154 u
"file" : "75206121.mp3" ,
155 u
"md5" : "f9d1fe9406717e302980c30de4af9353" ,
157 u
"upload_date" : u
"20130116" ,
158 u
"description" : u
"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\n As a gift to our fans we would like to offer you a free download of the track! " ,
159 u
"uploader" : u
"The Royal Concept" ,
160 u
"title" : u
"World On Fire"
166 def _real_extract ( self
, url
):
167 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
169 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
171 # extract uploader (which is in the url)
172 uploader
= mobj
. group ( 1 )
173 # extract simple title (uploader + slug of song title)
174 slug_title
= mobj
. group ( 2 )
175 full_title
= ' %s /sets/ %s ' % ( uploader
, slug_title
)
177 self
. report_resolve ( full_title
)
179 url
= 'http://soundcloud.com/ %s /sets/ %s ' % ( uploader
, slug_title
)
180 resolv_url
= self
._ resolv
_u rl
( url
)
181 info_json
= self
._ download
_ webpage
( resolv_url
, full_title
)
184 info
= json
. loads ( info_json
)
186 for err
in info
[ 'errors' ]:
187 self
._ downloader
. report_error ( u
'unable to download video webpage: %s ' % compat_str ( err
[ 'error_message' ]))
190 self
. report_extraction ( full_title
)
191 return { '_type' : 'playlist' ,
192 'entries' : [ self
._ extract
_ info
_ dict
( track
) for track
in info
[ 'tracks' ]],
194 'title' : info
[ 'title' ],