]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py
4 from . common
import InfoExtractor
14 class SoundcloudIE ( InfoExtractor
):
15 """Information extractor for soundcloud.com
16 To access the media, the uid of the song and a stream token
17 must be extracted from the page source and the script must make
18 a request to media.soundcloud.com/crossdomain.xml. Then
19 the media can be grabbed by requesting from an url composed
20 of the stream token and uid
23 _VALID_URL
= r
'''^(?:https?://)?
24 (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
25 |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
26 |(?P<widget>w.soundcloud.com/player/?.*?url=.*)
29 IE_NAME
= u
'soundcloud'
31 u
'url' : u
'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,
32 u
'file' : u
'62986583.mp3' ,
33 u
'md5' : u
'ebef0a451b909710ed1d7787dddbf0d7' ,
35 u
"upload_date" : u
"20121011" ,
36 u
"description" : u
"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,
37 u
"uploader" : u
"E.T. ExTerrestrial Music" ,
38 u
"title" : u
"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
42 _CLIENT_ID
= 'b45b1aa10f1ac2941910a7f0d10f8e28'
45 def suitable ( cls
, url
):
46 return re
. match ( cls
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
) is not None
48 def report_resolve ( self
, video_id
):
49 """Report information extraction."""
50 self
. to_screen ( u
' %s : Resolving id' % video_id
)
53 def _resolv_url ( cls
, url
):
54 return 'http://api.soundcloud.com/resolve.json?url=' + url
+ '&client_id=' + cls
._ CLIENT
_ ID
56 def _extract_info_dict ( self
, info
, full_title
= None ):
58 name
= full_title
or video_id
59 self
. report_extraction ( name
)
61 thumbnail
= info
[ 'artwork_url' ]
62 if thumbnail
is not None :
63 thumbnail
= thumbnail
. replace ( '-large' , '-t500x500' )
66 'url' : info
[ 'stream_url' ] + '?client_id=' + self
._ CLIENT
_ ID
,
67 'uploader' : info
[ 'user' ][ 'username' ],
68 'upload_date' : unified_strdate ( info
[ 'created_at' ]),
69 'title' : info
[ 'title' ],
71 'description' : info
[ 'description' ],
72 'thumbnail' : thumbnail
,
75 def _real_extract ( self
, url
):
76 mobj
= re
. match ( self
._ VALID
_U RL
, url
, flags
= re
. VERBOSE
)
78 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
80 track_id
= mobj
. group ( 'track_id' )
81 if track_id
is not None :
82 info_json_url
= 'http://api.soundcloud.com/tracks/' + track_id
+ '.json?client_id=' + self
._ CLIENT
_ ID
84 elif mobj
. group ( 'widget' ):
85 query
= compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)
86 return self
. url_result ( query
[ 'url' ][ 0 ], ie
= 'Soundcloud' )
88 # extract uploader (which is in the url)
89 uploader
= mobj
. group ( 1 )
90 # extract simple title (uploader + slug of song title)
91 slug_title
= mobj
. group ( 2 )
92 full_title
= ' %s / %s ' % ( uploader
, slug_title
)
94 self
. report_resolve ( full_title
)
96 url
= 'http://soundcloud.com/ %s / %s ' % ( uploader
, slug_title
)
97 info_json_url
= self
._ resolv
_u rl
( url
)
98 info_json
= self
._ download
_ webpage
( info_json_url
, full_title
, u
'Downloading info JSON' )
100 info
= json
. loads ( info_json
)
101 return self
._ extract
_ info
_ dict
( info
, full_title
)
103 class SoundcloudSetIE ( SoundcloudIE
):
104 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
105 IE_NAME
= u
'soundcloud:set'
107 u
"url" : "https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep" ,
110 u
"file" : "30510138.mp3" ,
111 u
"md5" : "f9136bf103901728f29e419d2c70f55d" ,
113 u
"upload_date" : u
"20111213" ,
114 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
115 u
"uploader" : u
"The Royal Concept" ,
116 u
"title" : u
"D-D-Dance"
120 u
"file" : "47127625.mp3" ,
121 u
"md5" : "09b6758a018470570f8fd423c9453dd8" ,
123 u
"upload_date" : u
"20120521" ,
124 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
125 u
"uploader" : u
"The Royal Concept" ,
126 u
"title" : u
"The Royal Concept - Gimme Twice"
130 u
"file" : "47127627.mp3" ,
131 u
"md5" : "154abd4e418cea19c3b901f1e1306d9c" ,
133 u
"upload_date" : u
"20120521" ,
134 u
"uploader" : u
"The Royal Concept" ,
135 u
"title" : u
"Goldrushed"
139 u
"file" : "47127629.mp3" ,
140 u
"md5" : "2f5471edc79ad3f33a683153e96a79c1" ,
142 u
"upload_date" : u
"20120521" ,
143 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,
144 u
"uploader" : u
"The Royal Concept" ,
145 u
"title" : u
"In the End"
149 u
"file" : "47127631.mp3" ,
150 u
"md5" : "f9ba87aa940af7213f98949254f1c6e2" ,
152 u
"upload_date" : u
"20120521" ,
153 u
"description" : u
"The Royal Concept from Stockholm \r\n Filip / David / Povel / Magnus \r\n www.theroyalconceptband.com" ,
154 u
"uploader" : u
"The Royal Concept" ,
155 u
"title" : u
"Knocked Up"
159 u
"file" : "75206121.mp3" ,
160 u
"md5" : "f9d1fe9406717e302980c30de4af9353" ,
162 u
"upload_date" : u
"20130116" ,
163 u
"description" : u
"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\n As a gift to our fans we would like to offer you a free download of the track! " ,
164 u
"uploader" : u
"The Royal Concept" ,
165 u
"title" : u
"World On Fire"
171 def _real_extract ( self
, url
):
172 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
174 raise ExtractorError ( u
'Invalid URL: %s ' % url
)
176 # extract uploader (which is in the url)
177 uploader
= mobj
. group ( 1 )
178 # extract simple title (uploader + slug of song title)
179 slug_title
= mobj
. group ( 2 )
180 full_title
= ' %s /sets/ %s ' % ( uploader
, slug_title
)
182 self
. report_resolve ( full_title
)
184 url
= 'http://soundcloud.com/ %s /sets/ %s ' % ( uploader
, slug_title
)
185 resolv_url
= self
._ resolv
_u rl
( url
)
186 info_json
= self
._ download
_ webpage
( resolv_url
, full_title
)
189 info
= json
. loads ( info_json
)
191 for err
in info
[ 'errors' ]:
192 self
._ downloader
. report_error ( u
'unable to download video webpage: %s ' % compat_str ( err
[ 'error_message' ]))
195 self
. report_extraction ( full_title
)
196 return { '_type' : 'playlist' ,
197 'entries' : [ self
._ extract
_ info
_ dict
( track
) for track
in info
[ 'tracks' ]],
199 'title' : info
[ 'title' ],