]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   7  from  . common 
import  InfoExtractor
 
  20  class  SoundcloudIE ( InfoExtractor
):  
  21      """Information extractor for soundcloud.com  
  22         To access the media, the uid of the song and a stream token  
  23         must be extracted from the page source and the script must make  
  24         a request to media.soundcloud.com/crossdomain.xml. Then  
  25         the media can be grabbed by requesting from an url composed  
  26         of the stream token and uid  
  29      _VALID_URL 
=  r
'''(?x)^(?:https?://)?  
  30                      (?:(?:(?:www\.|m\.)?soundcloud\.com/  
  31                              (?P<uploader>[\w\d-]+)/  
  32                              (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))  
  34                              (?P<token>[^?]+?)?(?:[?].*)?$)  
  35                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)  
  36                            (?:/?\?secret_token=(?P<secret_token>[^&]+))?)  
  37                         |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)  
  40      IE_NAME 
=  'soundcloud'  
  43              'url' :  'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,  
  44              'md5' :  'ebef0a451b909710ed1d7787dddbf0d7' ,  
  48                  'upload_date' :  '20121011' ,  
  49                  'description' :  'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d' ,  
  50                  'uploader' :  'E.T. ExTerrestrial Music' ,  
  51                  'title' :  'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1' ,  
  57              'url' :  'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,  
  61                  'title' :  'Goldrushed' ,  
  62                  'description' :  'From Stockholm Sweden \r\n Povel / Magnus / Filip / David \r\n www.theroyalconcept.com' ,  
  63                  'uploader' :  'The Royal Concept' ,  
  64                  'upload_date' :  '20120521' ,  
  69                  'skip_download' :  True ,  
  74              'url' :  'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,  
  75              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,  
  79                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,  
  80                  'uploader' :  'jaimeMF' ,  
  81                  'description' :  'test chars:   \"\' / \\ ä↭' ,  
  82                  'upload_date' :  '20131209' ,  
  86          # private link (alt format)  
  88              'url' :  'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp' ,  
  89              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,  
  93                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,  
  94                  'uploader' :  'jaimeMF' ,  
  95                  'description' :  'test chars:   \"\' / \\ ä↭' ,  
  96                  'upload_date' :  '20131209' ,  
 102              'url' :  'https://soundcloud.com/oddsamples/bus-brakes' ,  
 103              'md5' :  '7624f2351f8a3b2e7cd51522496e7631' ,  
 107                  'title' :  'Bus Brakes' ,  
 108                  'description' :  'md5:0053ca6396e8d2fd7b7e1595ef12ab66' ,  
 109                  'uploader' :  'oddsamples' ,  
 110                  'upload_date' :  '20140109' ,  
 116      _CLIENT_ID 
=  '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'  
 117      _IPHONE_CLIENT_ID 
=  '376f225bf427445fc4bfb6b99b72e0bf'  
 119      def  report_resolve ( self
,  video_id
):  
 120          """Report information extraction."""  
 121          self
. to_screen ( ' %s : Resolving id'  %  video_id
)  
 124      def  _resolv_url ( cls
,  url
):  
 125          return  'http://api.soundcloud.com/resolve.json?url='  +  url 
+  '&client_id='  +  cls
._ CLIENT
_ ID
 
 127      def  _extract_info_dict ( self
,  info
,  full_title
= None ,  quiet
= False ,  secret_token
= None ):  
 128          track_id 
=  compat_str ( info
[ 'id' ])  
 129          name 
=  full_title 
or  track_id
 
 131              self
. report_extraction ( name
)  
 133          thumbnail 
=  info
[ 'artwork_url' ]  
 134          if  thumbnail 
is not None :  
 135              thumbnail 
=  thumbnail
. replace ( '-large' ,  '-t500x500' )  
 139              'uploader' :  info
[ 'user' ][ 'username' ],  
 140              'upload_date' :  unified_strdate ( info
[ 'created_at' ]),  
 141              'title' :  info
[ 'title' ],  
 142              'description' :  info
[ 'description' ],  
 143              'thumbnail' :  thumbnail
,  
 144              'duration' :  int_or_none ( info
. get ( 'duration' ),  1000 ),  
 145              'webpage_url' :  info
. get ( 'permalink_url' ),  
 148          if  info
. get ( 'downloadable' ,  False ):  
 149              # We can build a direct link to the song  
 151                  'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (  
 152                      track_id
,  self
._ CLIENT
_ ID
))  
 154                  'format_id' :  'download' ,  
 155                  'ext' :  info
. get ( 'original_format' ,  'mp3' ),  
 161          # We have to retrieve the url  
 162          streams_url 
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'  
 163                         'client_id= {1} &secret_token= {2} ' . format ( track_id
,  self
._ IPHONE
_ CLIENT
_ ID
,  secret_token
))  
 164          format_dict 
=  self
._ download
_ json
(  
 166              track_id
,  'Downloading track url' )  
 168          for  key
,  stream_url 
in  format_dict
. items ():  
 169              if  key
. startswith ( 'http' ):  
 176              elif  key
. startswith ( 'rtmp' ):  
 177                  # The url doesn't have an rtmp app, we have to extract the playpath  
 178                  url
,  path 
=  stream_url
. split ( 'mp3:' ,  1 )  
 182                      'play_path' :  'mp3:'  +  path
,  
 188                  # We fallback to the stream_url in the original info, this  
 189                  # cannot be always used, sometimes it can give an HTTP 404 error  
 191                      'format_id' :  'fallback' ,  
 192                      'url' :  info
[ 'stream_url' ] +  '?client_id='  +  self
._ CLIENT
_ ID
,  
 198                  if  f
[ 'format_id' ]. startswith ( 'http' ):  
 199                      f
[ 'protocol' ] =  'http'  
 200                  if  f
[ 'format_id' ]. startswith ( 'rtmp' ):  
 201                      f
[ 'protocol' ] =  'rtmp'  
 203          self
._ check
_ formats
( formats
,  track_id
)  
 204          self
._ sort
_ formats
( formats
)  
 205          result
[ 'formats' ] =  formats
 
 209      def  _real_extract ( self
,  url
):  
 210          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  flags
= re
. VERBOSE
)  
 212              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  
 214          track_id 
=  mobj
. group ( 'track_id' )  
 216          if  track_id 
is not None :  
 217              info_json_url 
=  'http://api.soundcloud.com/tracks/'  +  track_id 
+  '.json?client_id='  +  self
._ CLIENT
_ ID
 
 218              full_title 
=  track_id
 
 219              token 
=  mobj
. group ( 'secret_token' )  
 221                  info_json_url 
+=  "&secret_token="  +  token
 
 222          elif  mobj
. group ( 'player' ):  
 223              query 
=  compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)  
 224              real_url 
=  query
[ 'url' ][ 0 ]  
 225              # If the token is in the query of the original url we have to  
 227              if  'secret_token'  in  query
:  
 228                  real_url 
+=  '?secret_token='  +  query
[ 'secret_token' ][ 0 ]  
 229              return  self
. url_result ( real_url
)  
 231              # extract uploader (which is in the url)  
 232              uploader 
=  mobj
. group ( 'uploader' )  
 233              # extract simple title (uploader + slug of song title)  
 234              slug_title 
=  mobj
. group ( 'title' )  
 235              token 
=  mobj
. group ( 'token' )  
 236              full_title 
=  resolve_title 
=  ' %s / %s '  % ( uploader
,  slug_title
)  
 238                  resolve_title 
+=  '/ %s '  %  token
 
 240              self
. report_resolve ( full_title
)  
 242              url 
=  'http://soundcloud.com/ %s '  %  resolve_title
 
 243              info_json_url 
=  self
._ resolv
_u rl
( url
)  
 244          info 
=  self
._ download
_ json
( info_json_url
,  full_title
,  'Downloading info JSON' )  
 246          return  self
._ extract
_ info
_ dict
( info
,  full_title
,  secret_token
= token
)  
 249  class  SoundcloudSetIE ( SoundcloudIE
):  
 250      _VALID_URL 
=  r
'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'  
 251      IE_NAME 
=  'soundcloud:set'  
 253          'url' :  'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep' ,  
 256              'title' :  'The Royal Concept EP' ,  
 258          'playlist_mincount' :  6 ,  
 261      def  _real_extract ( self
,  url
):  
 262          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 264          # extract uploader (which is in the url)  
 265          uploader 
=  mobj
. group ( 'uploader' )  
 266          # extract simple title (uploader + slug of song title)  
 267          slug_title 
=  mobj
. group ( 'slug_title' )  
 268          full_title 
=  ' %s /sets/ %s '  % ( uploader
,  slug_title
)  
 269          url 
=  'http://soundcloud.com/ %s /sets/ %s '  % ( uploader
,  slug_title
)  
 271          token 
=  mobj
. group ( 'token' )  
 273              full_title 
+=  '/'  +  token
 
 276          self
. report_resolve ( full_title
)  
 278          resolv_url 
=  self
._ resolv
_u rl
( url
)  
 279          info 
=  self
._ download
_ json
( resolv_url
,  full_title
)  
 282              msgs 
= ( compat_str ( err
[ 'error_message' ])  for  err 
in  info
[ 'errors' ])  
 283              raise  ExtractorError ( 'unable to download video webpage:  %s '  %  ',' . join ( msgs
))  
 285          entries 
= [ self
. url_result ( track
[ 'permalink_url' ],  'Soundcloud' )  for  track 
in  info
[ 'tracks' ]]  
 290              'id' :  ' %s '  %  info
[ 'id' ],  
 291              'title' :  info
[ 'title' ],  
 295  class  SoundcloudUserIE ( SoundcloudIE
):  
 296      _VALID_URL 
=  r
'''(?x)  
 298                              (?:(?:www|m)\.)?soundcloud\.com/  
 301                                  (?P<rsrc>tracks|sets|reposts|likes|spotlight)  
 305      IE_NAME 
=  'soundcloud:user'  
 307          'url' :  'https://soundcloud.com/the-akashic-chronicler' ,  
 310              'title' :  'The Akashic Chronicler (All)' ,  
 312          'playlist_mincount' :  111 ,  
 314          'url' :  'https://soundcloud.com/the-akashic-chronicler/tracks' ,  
 317              'title' :  'The Akashic Chronicler (Tracks)' ,  
 319          'playlist_mincount' :  50 ,  
 321          'url' :  'https://soundcloud.com/the-akashic-chronicler/sets' ,  
 324              'title' :  'The Akashic Chronicler (Playlists)' ,  
 326          'playlist_mincount' :  3 ,  
 328          'url' :  'https://soundcloud.com/the-akashic-chronicler/reposts' ,  
 331              'title' :  'The Akashic Chronicler (Reposts)' ,  
 333          'playlist_mincount' :  7 ,  
 335          'url' :  'https://soundcloud.com/the-akashic-chronicler/likes' ,  
 338              'title' :  'The Akashic Chronicler (Likes)' ,  
 340          'playlist_mincount' :  321 ,  
 342          'url' :  'https://soundcloud.com/grynpyret/spotlight' ,  
 345              'title' :  'Grynpyret (Spotlight)' ,  
 347          'playlist_mincount' :  1 ,  
 350      _API_BASE 
=  'https://api.soundcloud.com'  
 351      _API_V2_BASE 
=  'https://api-v2.soundcloud.com'  
 354          'all' :  ' %s /profile/soundcloud:users: %%s '  %  _API_V2_BASE
,  
 355          'tracks' :  ' %s /users/ %%s /tracks'  %  _API_BASE
,  
 356          'sets' :  ' %s /users/ %%s /playlists'  %  _API_V2_BASE
,  
 357          'reposts' :  ' %s /profile/soundcloud:users: %%s /reposts'  %  _API_V2_BASE
,  
 358          'likes' :  ' %s /users/ %%s /likes'  %  _API_V2_BASE
,  
 359          'spotlight' :  ' %s /users/ %%s /spotlight'  %  _API_V2_BASE
,  
 366          'reposts' :  'Reposts' ,  
 368          'spotlight' :  'Spotlight' ,  
 371      def  _real_extract ( self
,  url
):  
 372          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 373          uploader 
=  mobj
. group ( 'user' )  
 375          url 
=  'http://soundcloud.com/ %s /'  %  uploader
 
 376          resolv_url 
=  self
._ resolv
_u rl
( url
)  
 377          user 
=  self
._ download
_ json
(  
 378              resolv_url
,  uploader
,  'Downloading user info' )  
 380          resource 
=  mobj
. group ( 'rsrc' )  or  'all'  
 381          base_url 
=  self
._ BASE
_U RL
_ MAP
[ resource
] %  user
[ 'id' ]  
 386          for  i 
in  itertools
. count ():  
 388                  data 
=  compat_urllib_parse
. urlencode ({  
 391                      'client_id' :  self
._ CLIENT
_ ID
,  
 392                      'linked_partitioning' :  '1' ,  
 393                      'representation' :  'speedy' ,  
 395                  next_href 
=  base_url 
+  '?'  +  data
 
 397              response 
=  self
._ download
_ json
(  
 398                  next_href
,  uploader
,  'Downloading track page  %s '  % ( i 
+  1 ))  
 400              collection 
=  response
[ 'collection' ]  
 403                  self
. to_screen ( ' %s : End page received'  %  uploader
)  
 406              def  resolve_permalink_url ( candidates
):  
 407                  for  cand 
in  candidates
:  
 408                      if  isinstance ( cand
,  dict ):  
 409                          permalink_url 
=  cand
. get ( 'permalink_url' )  
 410                          if  permalink_url 
and  permalink_url
. startswith ( 'http' ):  
 414                  permalink_url 
=  resolve_permalink_url (( e
,  e
. get ( 'track' ),  e
. get ( 'playlist' )))  
 416                      entries
. append ( self
. url_result ( permalink_url
))  
 418              if  'next_href'  in  response
:  
 419                  next_href 
=  response
[ 'next_href' ]  
 427              'id' :  compat_str ( user
[ 'id' ]),  
 428              'title' :  ' %s  ( %s )'  % ( user
[ 'username' ],  self
._ TITLE
_ MAP
[ resource
]),  
 433  class  SoundcloudPlaylistIE ( SoundcloudIE
):  
 434      _VALID_URL 
=  r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'  
 435      IE_NAME 
=  'soundcloud:playlist'  
 437          'url' :  'http://api.soundcloud.com/playlists/4110309' ,  
 440              'title' :  'TILT Brass - Bowery Poetry Club, August  \' 03 [Non-Site SCR 02]' ,  
 441              'description' :  're:.*?TILT Brass - Bowery Poetry Club' ,  
 446      def  _real_extract ( self
,  url
):  
 447          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 448          playlist_id 
=  mobj
. group ( 'id' )  
 449          base_url 
=  ' %s //api.soundcloud.com/playlists/ %s .json?'  % ( self
. http_scheme (),  playlist_id
)  
 452              'client_id' :  self
._ CLIENT
_ ID
,  
 454          token 
=  mobj
. group ( 'token' )  
 457              data_dict
[ 'secret_token' ] =  token
 
 459          data 
=  compat_urllib_parse
. urlencode ( data_dict
)  
 460          data 
=  self
._ download
_ json
(  
 461              base_url 
+  data
,  playlist_id
,  'Downloading playlist' )  
 463          entries 
= [ self
. url_result ( track
[ 'permalink_url' ],  'Soundcloud' )  for  track 
in  data
[ 'tracks' ]]  
 468              'title' :  data
. get ( 'title' ),  
 469              'description' :  data
. get ( 'description' ),