]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py 
097d0e418d452a968cdf0355419b02c4dd392081
   2  from  __future__ 
import  unicode_literals
   7  from  . common 
import  InfoExtractor
  19  class  SoundcloudIE ( InfoExtractor
):   20      """Information extractor for soundcloud.com   21         To access the media, the uid of the song and a stream token   22         must be extracted from the page source and the script must make   23         a request to media.soundcloud.com/crossdomain.xml. Then   24         the media can be grabbed by requesting from an url composed   25         of the stream token and uid   28      _VALID_URL 
=  r
'''(?x)^(?:https?://)?   29                      (?:(?:(?:www\.|m\.)?soundcloud\.com/   30                              (?P<uploader>[\w\d-]+)/   31                              (?!sets/)(?P<title>[\w\d-]+)/?   32                              (?P<token>[^?]+?)?(?:[?].*)?$)   33                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))   34                         |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)   37      IE_NAME 
=  'soundcloud'   40              'url' :  'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,   41              'file' :  '62986583.mp3' ,   42              'md5' :  'ebef0a451b909710ed1d7787dddbf0d7' ,   44                  "upload_date" :  "20121011" ,   45                  "description" :  "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,   46                  "uploader" :  "E.T. ExTerrestrial Music" ,   47                  "title" :  "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" ,   53              'url' :  'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,   57                  'title' :  'Goldrushed' ,   58                  'description' :  'From Stockholm Sweden \r\n Povel / Magnus / Filip / David \r\n www.theroyalconcept.com' ,   59                  'uploader' :  'The Royal Concept' ,   60                  'upload_date' :  '20120521' ,   65                  'skip_download' :  True ,   70              'url' :  'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,   71              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,   75                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,   76                  'uploader' :  'jaimeMF' ,   77                  'description' :  'test chars:   \"\' / \\ ä↭' ,   78                  'upload_date' :  '20131209' ,   84              'url' :  'https://soundcloud.com/oddsamples/bus-brakes' ,   85              'md5' :  '7624f2351f8a3b2e7cd51522496e7631' ,   89                  'title' :  'Bus Brakes' ,   90                  'description' :  'md5:0170be75dd395c96025d210d261c784e' ,   91                  'uploader' :  'oddsamples' ,   92                  'upload_date' :  '20140109' ,   98      _CLIENT_ID 
=  'b45b1aa10f1ac2941910a7f0d10f8e28'   99      _IPHONE_CLIENT_ID 
=  '376f225bf427445fc4bfb6b99b72e0bf'  101      def  report_resolve ( self
,  video_id
):  102          """Report information extraction."""  103          self
. to_screen ( ' %s : Resolving id'  %  video_id
)  106      def  _resolv_url ( cls
,  url
):  107          return  'http://api.soundcloud.com/resolve.json?url='  +  url 
+  '&client_id='  +  cls
._ CLIENT
_ ID
 109      def  _extract_info_dict ( self
,  info
,  full_title
= None ,  quiet
= False ,  secret_token
= None ):  110          track_id 
=  compat_str ( info
[ 'id' ])  111          name 
=  full_title 
or  track_id
 113              self
. report_extraction ( name
)  115          thumbnail 
=  info
[ 'artwork_url' ]  116          if  thumbnail 
is not None :  117              thumbnail 
=  thumbnail
. replace ( '-large' ,  '-t500x500' )  121              'uploader' :  info
[ 'user' ][ 'username' ],  122              'upload_date' :  unified_strdate ( info
[ 'created_at' ]),  123              'title' :  info
[ 'title' ],  124              'description' :  info
[ 'description' ],  125              'thumbnail' :  thumbnail
,  126              'duration' :  int_or_none ( info
. get ( 'duration' ),  1000 ),  129          if  info
. get ( 'downloadable' ,  False ):  130              # We can build a direct link to the song  132                  'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (  133                      track_id
,  self
._ CLIENT
_ ID
))  135                  'format_id' :  'download' ,  136                  'ext' :  info
. get ( 'original_format' ,  'mp3' ),  142          # We have to retrieve the url  143          streams_url 
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'  144              'client_id= {1} &secret_token= {2} ' . format ( track_id
,  self
._ IPHONE
_ CLIENT
_ ID
,  secret_token
))  145          format_dict 
=  self
._ download
_ json
(  147              track_id
,  'Downloading track url' )  149          for  key
,  stream_url 
in  format_dict
. items ():  150              if  key
. startswith ( 'http' ):  157              elif  key
. startswith ( 'rtmp' ):  158                  # The url doesn't have an rtmp app, we have to extract the playpath  159                  url
,  path 
=  stream_url
. split ( 'mp3:' ,  1 )  163                      'play_path' :  'mp3:'  +  path
,  169                  # We fallback to the stream_url in the original info, this  170                  # cannot be always used, sometimes it can give an HTTP 404 error  172                      'format_id' :  'fallback' ,  173                      'url' :  info
[ 'stream_url' ] +  '?client_id='  +  self
._ CLIENT
_ ID
,  179                  if  f
[ 'format_id' ]. startswith ( 'http' ):  180                      f
[ 'protocol' ] =  'http'  181                  if  f
[ 'format_id' ]. startswith ( 'rtmp' ):  182                      f
[ 'protocol' ] =  'rtmp'  184              self
._ sort
_ formats
( formats
)  185              result
[ 'formats' ] =  formats
 189      def  _real_extract ( self
,  url
):  190          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  flags
= re
. VERBOSE
)  192              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  194          track_id 
=  mobj
. group ( 'track_id' )  196          if  track_id 
is not None :  197              info_json_url 
=  'http://api.soundcloud.com/tracks/'  +  track_id 
+  '.json?client_id='  +  self
._ CLIENT
_ ID
 198              full_title 
=  track_id
 199          elif  mobj
. group ( 'player' ):  200              query 
=  compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)  201              return  self
. url_result ( query
[ 'url' ][ 0 ])  203              # extract uploader (which is in the url)  204              uploader 
=  mobj
. group ( 'uploader' )  205              # extract simple title (uploader + slug of song title)  206              slug_title 
=   mobj
. group ( 'title' )  207              token 
=  mobj
. group ( 'token' )  208              full_title 
=  resolve_title 
=  ' %s / %s '  % ( uploader
,  slug_title
)  210                  resolve_title 
+=  '/ %s '  %  token
 212              self
. report_resolve ( full_title
)  214              url 
=  'http://soundcloud.com/ %s '  %  resolve_title
 215              info_json_url 
=  self
._ resolv
_u rl
( url
)  216          info 
=  self
._ download
_ json
( info_json_url
,  full_title
,  'Downloading info JSON' )  218          return  self
._ extract
_ info
_ dict
( info
,  full_title
,  secret_token
= token
)  221  class  SoundcloudSetIE ( SoundcloudIE
):  222      _VALID_URL 
=  r
'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'  223      IE_NAME 
=  'soundcloud:set'  224      # it's in tests/test_playlists.py  227      def  _real_extract ( self
,  url
):  228          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  230              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  232          # extract uploader (which is in the url)  233          uploader 
=  mobj
. group ( 1 )  234          # extract simple title (uploader + slug of song title)  235          slug_title 
=  mobj
. group ( 2 )  236          full_title 
=  ' %s /sets/ %s '  % ( uploader
,  slug_title
)  238          self
. report_resolve ( full_title
)  240          url 
=  'http://soundcloud.com/ %s /sets/ %s '  % ( uploader
,  slug_title
)  241          resolv_url 
=  self
._ resolv
_u rl
( url
)  242          info 
=  self
._ download
_ json
( resolv_url
,  full_title
)  245              for  err 
in  info
[ 'errors' ]:  246                  self
._ downloader
. report_error ( 'unable to download video webpage:  %s '  %  compat_str ( err
[ 'error_message' ]))  249          self
. report_extraction ( full_title
)  250          return  { '_type' :  'playlist' ,  251                  'entries' : [ self
._ extract
_ info
_ dict
( track
)  for  track 
in  info
[ 'tracks' ]],  253                  'title' :  info
[ 'title' ],  257  class  SoundcloudUserIE ( SoundcloudIE
):  258      _VALID_URL 
=  r
'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'  259      IE_NAME 
=  'soundcloud:user'  261      # it's in tests/test_playlists.py  264      def  _real_extract ( self
,  url
):  265          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  266          uploader 
=  mobj
. group ( 'user' )  267          resource 
=  mobj
. group ( 'rsrc' )  270          elif  resource 
==  'likes' :  271              resource 
=  'favorites'  273          url 
=  'http://soundcloud.com/ %s /'  %  uploader
 274          resolv_url 
=  self
._ resolv
_u rl
( url
)  275          user 
=  self
._ download
_ json
(  276              resolv_url
,  uploader
,  'Downloading user info' )  277          base_url 
=  'http://api.soundcloud.com/users/ %s / %s .json?'  % ( uploader
,  resource
)  280          for  i 
in  itertools
. count ():  281              data 
=  compat_urllib_parse
. urlencode ({  284                  'client_id' :  self
._ CLIENT
_ ID
,  286              new_entries 
=  self
._ download
_ json
(  287                  base_url 
+  data
,  uploader
,  'Downloading track page  %s '  % ( i 
+  1 ))  288              if  len ( new_entries
) ==  0 :  289                  self
. to_screen ( ' %s : End page received'  %  uploader
)  291              entries
. extend ( self
._ extract
_ info
_ dict
( e
,  quiet
= True )  for  e 
in  new_entries
)  295              'id' :  compat_str ( user
[ 'id' ]),  296              'title' :  user
[ 'username' ],  301  class  SoundcloudPlaylistIE ( SoundcloudIE
):  302      _VALID_URL 
=  r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'  303      IE_NAME 
=  'soundcloud:playlist'  305       # it's in tests/test_playlists.py  308      def  _real_extract ( self
,  url
):  309          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  310          playlist_id 
=  mobj
. group ( 'id' )  311          base_url 
=  ' %s //api.soundcloud.com/playlists/ %s .json?'  % ( self
. http_scheme (),  playlist_id
)  313          data 
=  compat_urllib_parse
. urlencode ({  314              'client_id' :  self
._ CLIENT
_ ID
,  316          data 
=  self
._ download
_ json
(  317              base_url 
+  data
,  playlist_id
,  'Downloading playlist' )  320              self
._ extract
_ info
_ dict
( t
,  quiet
= True )  for  t 
in  data
[ 'tracks' ]]  325              'title' :  data
. get ( 'title' ),  326              'description' :  data
. get ( 'description' ),