]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py 
 
 
 
 
 
 
 
 
   4  from  . common 
import  InfoExtractor
 
  14  class  SoundcloudIE ( InfoExtractor
):  
  15      """Information extractor for soundcloud.com  
  16         To access the media, the uid of the song and a stream token  
  17         must be extracted from the page source and the script must make  
  18         a request to media.soundcloud.com/crossdomain.xml. Then  
  19         the media can be grabbed by requesting from an url composed  
  20         of the stream token and uid  
  23      _VALID_URL 
=  r
'''^(?:https?://)?  
  24                      (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)  
  25                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))  
  26                         |(?P<widget>w.soundcloud.com/player/?.*?url=.*)  
  29      IE_NAME 
=  u
'soundcloud'  
  31          u
'url' :  u
'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,  
  32          u
'file' :  u
'62986583.mp3' ,  
  33          u
'md5' :  u
'ebef0a451b909710ed1d7787dddbf0d7' ,  
  35              u
"upload_date" :  u
"20121011" ,   
  36              u
"description" :  u
"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,   
  37              u
"uploader" :  u
"E.T. ExTerrestrial Music" ,   
  38              u
"title" :  u
"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"  
  42      _CLIENT_ID 
=  'b45b1aa10f1ac2941910a7f0d10f8e28'  
  45      def  suitable ( cls
,  url
):  
  46          return  re
. match ( cls
._ VALID
_U RL
,  url
,  flags
= re
. VERBOSE
)  is not None  
  48      def  report_resolve ( self
,  video_id
):  
  49          """Report information extraction."""  
  50          self
. to_screen ( u
' %s : Resolving id'  %  video_id
)  
  53      def  _resolv_url ( cls
,  url
):  
  54          return  'http://api.soundcloud.com/resolve.json?url='  +  url 
+  '&client_id='  +  cls
._ CLIENT
_ ID
 
  56      def  _extract_info_dict ( self
,  info
,  full_title
= None ):  
  58          name 
=  full_title 
or  video_id
 
  59          self
. report_extraction ( name
)  
  61          thumbnail 
=  info
[ 'artwork_url' ]  
  62          if  thumbnail 
is not None :  
  63              thumbnail 
=  thumbnail
. replace ( '-large' ,  '-t500x500' )  
  66              'url' :       info
[ 'stream_url' ] +  '?client_id='  +  self
._ CLIENT
_ ID
,  
  67              'uploader' :  info
[ 'user' ][ 'username' ],  
  68              'upload_date' :  unified_strdate ( info
[ 'created_at' ]),  
  69              'title' :     info
[ 'title' ],  
  71              'description' :  info
[ 'description' ],  
  72              'thumbnail' :  thumbnail
,  
  75      def  _real_extract ( self
,  url
):  
  76          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  flags
= re
. VERBOSE
)  
  78              raise  ExtractorError ( u
'Invalid URL:  %s '  %  url
)  
  80          track_id 
=  mobj
. group ( 'track_id' )  
  81          if  track_id 
is not None :  
  82              info_json_url 
=  'http://api.soundcloud.com/tracks/'  +  track_id 
+  '.json?client_id='  +  self
._ CLIENT
_ ID
 
  84          elif  mobj
. group ( 'widget' ):  
  85              query 
=  compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)  
  86              return  self
. url_result ( query
[ 'url' ][ 0 ],  ie
= 'Soundcloud' )  
  88              # extract uploader (which is in the url)  
  89              uploader 
=  mobj
. group ( 1 )  
  90              # extract simple title (uploader + slug of song title)  
  91              slug_title 
=   mobj
. group ( 2 )  
  92              full_title 
=  ' %s / %s '  % ( uploader
,  slug_title
)  
  94              self
. report_resolve ( full_title
)  
  96              url 
=  'http://soundcloud.com/ %s / %s '  % ( uploader
,  slug_title
)  
  97              info_json_url 
=  self
._ resolv
_u rl
( url
)  
  98          info_json 
=  self
._ download
_ webpage
( info_json_url
,  full_title
,  u
'Downloading info JSON' )  
 100          info 
=  json
. loads ( info_json
)  
 101          return  self
._ extract
_ info
_ dict
( info
,  full_title
)  
 103  class  SoundcloudSetIE ( SoundcloudIE
):  
 104      _VALID_URL 
=  r
'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'  
 105      IE_NAME 
=  u
'soundcloud:set'  
 107          u
"url" : "https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep" ,  
 110                  u
"file" : "30510138.mp3" ,  
 111                  u
"md5" : "f9136bf103901728f29e419d2c70f55d" ,  
 113                      u
"upload_date" :  u
"20111213" ,  
 114                      u
"description" :  u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,  
 115                      u
"uploader" :  u
"The Royal Concept" ,  
 116                      u
"title" :  u
"D-D-Dance"  
 120                  u
"file" : "47127625.mp3" ,  
 121                  u
"md5" : "09b6758a018470570f8fd423c9453dd8" ,  
 123                      u
"upload_date" :  u
"20120521" ,  
 124                      u
"description" :  u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,  
 125                      u
"uploader" :  u
"The Royal Concept" ,  
 126                      u
"title" :  u
"The Royal Concept - Gimme Twice"  
 130                  u
"file" : "47127627.mp3" ,  
 131                  u
"md5" : "154abd4e418cea19c3b901f1e1306d9c" ,  
 133                      u
"upload_date" :  u
"20120521" ,  
 134                      u
"uploader" :  u
"The Royal Concept" ,  
 135                      u
"title" :  u
"Goldrushed"  
 139                  u
"file" : "47127629.mp3" ,  
 140                  u
"md5" : "2f5471edc79ad3f33a683153e96a79c1" ,  
 142                      u
"upload_date" :  u
"20120521" ,  
 143                      u
"description" :  u
"The Royal Concept from Stockholm \r\n Filip / Povel / David / Magnus \r\n www.royalconceptband.com" ,  
 144                      u
"uploader" :  u
"The Royal Concept" ,  
 145                      u
"title" :  u
"In the End"  
 149                  u
"file" : "47127631.mp3" ,  
 150                  u
"md5" : "f9ba87aa940af7213f98949254f1c6e2" ,  
 152                      u
"upload_date" :  u
"20120521" ,  
 153                      u
"description" :  u
"The Royal Concept from Stockholm \r\n Filip / David / Povel / Magnus \r\n www.theroyalconceptband.com" ,  
 154                      u
"uploader" :  u
"The Royal Concept" ,  
 155                      u
"title" :  u
"Knocked Up"  
 159                  u
"file" : "75206121.mp3" ,  
 160                  u
"md5" : "f9d1fe9406717e302980c30de4af9353" ,  
 162                      u
"upload_date" :  u
"20130116" ,  
 163                      u
"description" :  u
"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central).   \r\n As a gift to our fans we would like to offer you a free download of the track!  " ,  
 164                      u
"uploader" :  u
"The Royal Concept" ,  
 165                      u
"title" :  u
"World On Fire"  
 171      def  _real_extract ( self
,  url
):  
 172          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 174              raise  ExtractorError ( u
'Invalid URL:  %s '  %  url
)  
 176          # extract uploader (which is in the url)  
 177          uploader 
=  mobj
. group ( 1 )  
 178          # extract simple title (uploader + slug of song title)  
 179          slug_title 
=   mobj
. group ( 2 )  
 180          full_title 
=  ' %s /sets/ %s '  % ( uploader
,  slug_title
)  
 182          self
. report_resolve ( full_title
)  
 184          url 
=  'http://soundcloud.com/ %s /sets/ %s '  % ( uploader
,  slug_title
)  
 185          resolv_url 
=  self
._ resolv
_u rl
( url
)  
 186          info_json 
=  self
._ download
_ webpage
( resolv_url
,  full_title
)  
 189          info 
=  json
. loads ( info_json
)  
 191              for  err 
in  info
[ 'errors' ]:  
 192                  self
._ downloader
. report_error ( u
'unable to download video webpage:  %s '  %  compat_str ( err
[ 'error_message' ]))  
 195          self
. report_extraction ( full_title
)  
 196          return  { '_type' :  'playlist' ,  
 197                  'entries' : [ self
._ extract
_ info
_ dict
( track
)  for  track 
in  info
[ 'tracks' ]],  
 199                  'title' :  info
[ 'title' ],