]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py
4 from . common
import InfoExtractor
13 class SoundcloudIE ( InfoExtractor
):
14 """Information extractor for soundcloud.com
15 To access the media, the uid of the song and a stream token
16 must be extracted from the page source and the script must make
17 a request to media.soundcloud.com/crossdomain.xml. Then
18 the media can be grabbed by requesting from an url composed
19 of the stream token and uid
22 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)(?:[?].*)?$'
23 IE_NAME
= u
'soundcloud'
25 u
'url' : u
'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,
26 u
'file' : u
'62986583.mp3' ,
27 u
'md5' : u
'ebef0a451b909710ed1d7787dddbf0d7' ,
29 u
"upload_date" : u
"20121011" ,
30 u
"description" : u
"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,
31 u
"uploader" : u
"E.T. ExTerrestrial Music" ,
32 u
"title" : u
"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
36 def report_resolve ( self
, video_id
):
37 """Report information extraction."""
38 self
. to_screen ( u
' %s : Resolving id' % video_id
)
40 def _real_extract ( self
, url
):
41 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
43 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
45 # extract uploader (which is in the url)
46 uploader
= mobj
. group ( 1 )
47 # extract simple title (uploader + slug of song title)
48 slug_title
= mobj
. group ( 2 )
49 full_title
= ' %s / %s ' % ( uploader
, slug_title
)
51 self
. report_resolve ( full_title
)
53 url
= 'http://soundcloud.com/ %s / %s ' % ( uploader
, slug_title
)
54 resolv_url
= 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
55 info_json
= self
._ download
_ webpage
( resolv_url
, full_title
, u
'Downloading info JSON' )
57 info
= json
. loads ( info_json
)
59 self
. report_extraction ( full_title
)
61 streams_url
= 'https://api.sndcdn.com/i1/tracks/' + str ( video_id
) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
62 stream_json
= self
._ download
_ webpage
( streams_url
, full_title
,
63 u
'Downloading stream definitions' ,
64 u
'unable to download stream definitions' )
66 streams
= json
. loads ( stream_json
)
67 mediaURL
= streams
[ 'http_mp3_128_url' ]
68 upload_date
= unified_strdate ( info
[ 'created_at' ])
73 'uploader' : info
[ 'user' ][ 'username' ],
74 'upload_date' : upload_date
,
75 'title' : info
[ 'title' ],
77 'description' : info
[ 'description' ],
80 class SoundcloudSetIE ( InfoExtractor
):
81 """Information extractor for soundcloud.com sets
82 To access the media, the uid of the song and a stream token
83 must be extracted from the page source and the script must make
84 a request to media.soundcloud.com/crossdomain.xml. Then
85 the media can be grabbed by requesting from an url composed
86 of the stream token and uid
89 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
90 IE_NAME
= u
'soundcloud:set'
92 u
"url" : "https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep" ,
95 u
"file" : "30510138.mp3" ,
96 u
"md5" : "f9136bf103901728f29e419d2c70f55d" ,
98 u
"upload_date" : u
"20111213" ,
99 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
100 u
"uploader" : u
"The Royal Concept" ,
101 u
"title" : u
"D-D-Dance"
105 u
"file" : "47127625.mp3" ,
106 u
"md5" : "09b6758a018470570f8fd423c9453dd8" ,
108 u
"upload_date" : u
"20120521" ,
109 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
110 u
"uploader" : u
"The Royal Concept" ,
111 u
"title" : u
"The Royal Concept - Gimme Twice"
115 u
"file" : "47127627.mp3" ,
116 u
"md5" : "154abd4e418cea19c3b901f1e1306d9c" ,
118 u
"upload_date" : u
"20120521" ,
119 u
"uploader" : u
"The Royal Concept" ,
120 u
"title" : u
"Goldrushed"
124 u
"file" : "47127629.mp3" ,
125 u
"md5" : "2f5471edc79ad3f33a683153e96a79c1" ,
127 u
"upload_date" : u
"20120521" ,
128 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
129 u
"uploader" : u
"The Royal Concept" ,
130 u
"title" : u
"In the End"
134 u
"file" : "47127631.mp3" ,
135 u
"md5" : "f9ba87aa940af7213f98949254f1c6e2" ,
137 u
"upload_date" : u
"20120521" ,
138 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / David / Povel / Magnus \r\n www.theroyalconceptband.com" ,
139 u
"uploader" : u
"The Royal Concept" ,
140 u
"title" : u
"Knocked Up"
144 u
"file" : "75206121.mp3" ,
145 u
"md5" : "f9d1fe9406717e302980c30de4af9353" ,
147 u
"upload_date" : u
"20130116" ,
148 u
"description" : u
"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\n As a gift to our fans we would like to offer you a free download of the track! " ,
149 u
"uploader" : u
"The Royal Concept" ,
150 u
"title" : u
"World On Fire"
156 def report_resolve ( self
, video_id
):
157 """Report information extraction."""
158 self
. to_screen ( u
' %s : Resolving id' % video_id
)
160 def _real_extract ( self
, url
):
161 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
163 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
165 # extract uploader (which is in the url)
166 uploader
= mobj
. group ( 1 )
167 # extract simple title (uploader + slug of song title)
168 slug_title
= mobj
. group ( 2 )
169 full_title
= ' %s /sets/ %s ' % ( uploader
, slug_title
)
171 self
. report_resolve ( full_title
)
173 url
= 'http://soundcloud.com/ %s /sets/ %s ' % ( uploader
, slug_title
)
174 resolv_url
= 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
175 info_json
= self
._ download
_ webpage
( resolv_url
, full_title
)
178 info
= json
. loads ( info_json
)
180 for err
in info
[ 'errors' ]:
181 self
._ downloader
. report_error ( u
'unable to download video webpage: %s ' % compat_str ( err
[ 'error_message' ]))
184 self
. report_extraction ( full_title
)
185 for track
in info
[ 'tracks' ]:
186 video_id
= track
[ 'id' ]
188 streams_url
= 'https://api.sndcdn.com/i1/tracks/' + str ( video_id
) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
189 stream_json
= self
._ download
_ webpage
( streams_url
, video_id
, u
'Downloading track info JSON' )
191 self
. report_extraction ( video_id
)
192 streams
= json
. loads ( stream_json
)
193 mediaURL
= streams
[ 'http_mp3_128_url' ]
198 'uploader' : track
[ 'user' ][ 'username' ],
199 'upload_date' : unified_strdate ( track
[ 'created_at' ]),
200 'title' : track
[ 'title' ],
202 'description' : track
[ 'description' ],