]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py 
54256e1a29f02ad75dfdf828e7b4819455294a14
   2  from  __future__ 
import  unicode_literals
   7  from  . common 
import  InfoExtractor
  19  class  SoundcloudIE ( InfoExtractor
):   20      """Information extractor for soundcloud.com   21         To access the media, the uid of the song and a stream token   22         must be extracted from the page source and the script must make   23         a request to media.soundcloud.com/crossdomain.xml. Then   24         the media can be grabbed by requesting from an url composed   25         of the stream token and uid   28      _VALID_URL 
=  r
'''(?x)^(?:https?://)?   29                      (?:(?:(?:www\.|m\.)?soundcloud\.com/   30                              (?P<uploader>[\w\d-]+)/   31                              (?!sets/|likes/?(?:$|[?#]))   33                              (?P<token>[^?]+?)?(?:[?].*)?$)   34                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)   35                            (?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)   36                         |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)   39      IE_NAME 
=  'soundcloud'   42              'url' :  'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,   43              'md5' :  'ebef0a451b909710ed1d7787dddbf0d7' ,   47                  'upload_date' :  '20121011' ,   48                  'description' :  'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d' ,   49                  'uploader' :  'E.T. ExTerrestrial Music' ,   50                  'title' :  'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1' ,   56              'url' :  'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,   60                  'title' :  'Goldrushed' ,   61                  'description' :  'From Stockholm Sweden \r\n Povel / Magnus / Filip / David \r\n www.theroyalconcept.com' ,   62                  'uploader' :  'The Royal Concept' ,   63                  'upload_date' :  '20120521' ,   68                  'skip_download' :  True ,   73              'url' :  'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,   74              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,   78                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,   79                  'uploader' :  'jaimeMF' ,   80                  'description' :  'test chars:   \"\' / \\ ä↭' ,   81                  'upload_date' :  '20131209' ,   85          # private link (alt format)   87              'url' :  'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp' ,   88              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,   92                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,   93                  'uploader' :  'jaimeMF' ,   94                  'description' :  'test chars:   \"\' / \\ ä↭' ,   95                  'upload_date' :  '20131209' ,  101              'url' :  'https://soundcloud.com/oddsamples/bus-brakes' ,  102              'md5' :  '7624f2351f8a3b2e7cd51522496e7631' ,  106                  'title' :  'Bus Brakes' ,  107                  'description' :  'md5:0053ca6396e8d2fd7b7e1595ef12ab66' ,  108                  'uploader' :  'oddsamples' ,  109                  'upload_date' :  '20140109' ,  115      _CLIENT_ID 
=  'b45b1aa10f1ac2941910a7f0d10f8e28'  116      _IPHONE_CLIENT_ID 
=  '376f225bf427445fc4bfb6b99b72e0bf'  118      def  report_resolve ( self
,  video_id
):  119          """Report information extraction."""  120          self
. to_screen ( ' %s : Resolving id'  %  video_id
)  123      def  _resolv_url ( cls
,  url
):  124          return  'http://api.soundcloud.com/resolve.json?url='  +  url 
+  '&client_id='  +  cls
._ CLIENT
_ ID
 126      def  _extract_info_dict ( self
,  info
,  full_title
= None ,  quiet
= False ,  secret_token
= None ):  127          track_id 
=  compat_str ( info
[ 'id' ])  128          name 
=  full_title 
or  track_id
 130              self
. report_extraction ( name
)  132          thumbnail 
=  info
[ 'artwork_url' ]  133          if  thumbnail 
is not None :  134              thumbnail 
=  thumbnail
. replace ( '-large' ,  '-t500x500' )  138              'uploader' :  info
[ 'user' ][ 'username' ],  139              'upload_date' :  unified_strdate ( info
[ 'created_at' ]),  140              'title' :  info
[ 'title' ],  141              'description' :  info
[ 'description' ],  142              'thumbnail' :  thumbnail
,  143              'duration' :  int_or_none ( info
. get ( 'duration' ),  1000 ),  144              'webpage_url' :  info
. get ( 'permalink_url' ),  147          if  info
. get ( 'downloadable' ,  False ):  148              # We can build a direct link to the song  150                  'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (  151                      track_id
,  self
._ CLIENT
_ ID
))  153                  'format_id' :  'download' ,  154                  'ext' :  info
. get ( 'original_format' ,  'mp3' ),  160          # We have to retrieve the url  161          streams_url 
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'  162              'client_id= {1} &secret_token= {2} ' . format ( track_id
,  self
._ IPHONE
_ CLIENT
_ ID
,  secret_token
))  163          format_dict 
=  self
._ download
_ json
(  165              track_id
,  'Downloading track url' )  167          for  key
,  stream_url 
in  format_dict
. items ():  168              if  key
. startswith ( 'http' ):  175              elif  key
. startswith ( 'rtmp' ):  176                  # The url doesn't have an rtmp app, we have to extract the playpath  177                  url
,  path 
=  stream_url
. split ( 'mp3:' ,  1 )  181                      'play_path' :  'mp3:'  +  path
,  187                  # We fallback to the stream_url in the original info, this  188                  # cannot be always used, sometimes it can give an HTTP 404 error  190                      'format_id' :  'fallback' ,  191                      'url' :  info
[ 'stream_url' ] +  '?client_id='  +  self
._ CLIENT
_ ID
,  197                  if  f
[ 'format_id' ]. startswith ( 'http' ):  198                      f
[ 'protocol' ] =  'http'  199                  if  f
[ 'format_id' ]. startswith ( 'rtmp' ):  200                      f
[ 'protocol' ] =  'rtmp'  202              self
._ sort
_ formats
( formats
)  203              result
[ 'formats' ] =  formats
 207      def  _real_extract ( self
,  url
):  208          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  flags
= re
. VERBOSE
)  210              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  212          track_id 
=  mobj
. group ( 'track_id' )  214          if  track_id 
is not None :  215              info_json_url 
=  'http://api.soundcloud.com/tracks/'  +  track_id 
+  '.json?client_id='  +  self
._ CLIENT
_ ID
 216              full_title 
=  track_id
 217              token 
=  mobj
. group ( 'secret_token' )  219                  info_json_url 
+=  "&secret_token="  +  token
 220          elif  mobj
. group ( 'player' ):  221              query 
=  compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)  222              return  self
. url_result ( query
[ 'url' ][ 0 ])  224              # extract uploader (which is in the url)  225              uploader 
=  mobj
. group ( 'uploader' )  226              # extract simple title (uploader + slug of song title)  227              slug_title 
=   mobj
. group ( 'title' )  228              token 
=  mobj
. group ( 'token' )  229              full_title 
=  resolve_title 
=  ' %s / %s '  % ( uploader
,  slug_title
)  231                  resolve_title 
+=  '/ %s '  %  token
 233              self
. report_resolve ( full_title
)  235              url 
=  'http://soundcloud.com/ %s '  %  resolve_title
 236              info_json_url 
=  self
._ resolv
_u rl
( url
)  237          info 
=  self
._ download
_ json
( info_json_url
,  full_title
,  'Downloading info JSON' )  239          return  self
._ extract
_ info
_ dict
( info
,  full_title
,  secret_token
= token
)  242  class  SoundcloudSetIE ( SoundcloudIE
):  243      _VALID_URL 
=  r
'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'  244      IE_NAME 
=  'soundcloud:set'  246          'url' :  'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep' ,  248              'title' :  'The Royal Concept EP' ,  250          'playlist_mincount' :  6 ,  253      def  _real_extract ( self
,  url
):  254          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  256          # extract uploader (which is in the url)  257          uploader 
=  mobj
. group ( 'uploader' )  258          # extract simple title (uploader + slug of song title)  259          slug_title 
=  mobj
. group ( 'slug_title' )  260          full_title 
=  ' %s /sets/ %s '  % ( uploader
,  slug_title
)  261          url 
=  'http://soundcloud.com/ %s /sets/ %s '  % ( uploader
,  slug_title
)  263          token 
=  mobj
. group ( 'token' )  265              full_title 
+=  '/'  +  token
 268          self
. report_resolve ( full_title
)  270          resolv_url 
=  self
._ resolv
_u rl
( url
)  271          info 
=  self
._ download
_ json
( resolv_url
,  full_title
)  274              for  err 
in  info
[ 'errors' ]:  275                  self
._ downloader
. report_error ( 'unable to download video webpage:  %s '  %  compat_str ( err
[ 'error_message' ]))  280              'entries' : [ self
._ extract
_ info
_ dict
( track
,  secret_token
= token
)  for  track 
in  info
[ 'tracks' ]],  282              'title' :  info
[ 'title' ],  286  class  SoundcloudUserIE ( SoundcloudIE
):  287      _VALID_URL 
=  r
'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'  288      IE_NAME 
=  'soundcloud:user'  290          'url' :  'https://soundcloud.com/the-concept-band' ,  293              'title' :  'The Royal Concept' ,  295          'playlist_mincount' :  12  297          'url' :  'https://soundcloud.com/the-concept-band/likes' ,  300              'title' :  'The Royal Concept' ,  302          'playlist_mincount' :  1 ,  305      def  _real_extract ( self
,  url
):  306          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  307          uploader 
=  mobj
. group ( 'user' )  308          resource 
=  mobj
. group ( 'rsrc' )  311          elif  resource 
==  'likes' :  312              resource 
=  'favorites'  314          url 
=  'http://soundcloud.com/ %s /'  %  uploader
 315          resolv_url 
=  self
._ resolv
_u rl
( url
)  316          user 
=  self
._ download
_ json
(  317              resolv_url
,  uploader
,  'Downloading user info' )  318          base_url 
=  'http://api.soundcloud.com/users/ %s / %s .json?'  % ( uploader
,  resource
)  321          for  i 
in  itertools
. count ():  322              data 
=  compat_urllib_parse
. urlencode ({  325                  'client_id' :  self
._ CLIENT
_ ID
,  327              new_entries 
=  self
._ download
_ json
(  328                  base_url 
+  data
,  uploader
,  'Downloading track page  %s '  % ( i 
+  1 ))  329              if  len ( new_entries
) ==  0 :  330                  self
. to_screen ( ' %s : End page received'  %  uploader
)  332              entries
. extend ( self
._ extract
_ info
_ dict
( e
,  quiet
= True )  for  e 
in  new_entries
)  336              'id' :  compat_str ( user
[ 'id' ]),  337              'title' :  user
[ 'username' ],  342  class  SoundcloudPlaylistIE ( SoundcloudIE
):  343      _VALID_URL 
=  r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'  344      IE_NAME 
=  'soundcloud:playlist'  346          'url' :  'http://api.soundcloud.com/playlists/4110309' ,  349              'title' :  'TILT Brass - Bowery Poetry Club, August  \' 03 [Non-Site SCR 02]' ,  350              'description' :  're:.*?TILT Brass - Bowery Poetry Club' ,  355      def  _real_extract ( self
,  url
):  356          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  357          playlist_id 
=  mobj
. group ( 'id' )  358          base_url 
=  ' %s //api.soundcloud.com/playlists/ %s .json?'  % ( self
. http_scheme (),  playlist_id
)  361              'client_id' :  self
._ CLIENT
_ ID
,  363          token 
=  mobj
. group ( 'token' )  366              data_dict
[ 'secret_token' ] =  token
 368          data 
=  compat_urllib_parse
. urlencode ( data_dict
)  369          data 
=  self
._ download
_ json
(  370              base_url 
+  data
,  playlist_id
,  'Downloading playlist' )  373              self
._ extract
_ info
_ dict
( t
,  quiet
= True ,  secret_token
= token
)  374                  for  t 
in  data
[ 'tracks' ]]  379              'title' :  data
. get ( 'title' ),  380              'description' :  data
. get ( 'description' ),