]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py 
   2  from  __future__ 
import  unicode_literals
   8  from  . common 
import  InfoExtractor
  20  class  SoundcloudIE ( InfoExtractor
):   21      """Information extractor for soundcloud.com   22         To access the media, the uid of the song and a stream token   23         must be extracted from the page source and the script must make   24         a request to media.soundcloud.com/crossdomain.xml. Then   25         the media can be grabbed by requesting from an url composed   26         of the stream token and uid   29      _VALID_URL 
=  r
'''(?x)^(?:https?://)?   30                      (?:(?:(?:www\.|m\.)?soundcloud\.com/   31                              (?P<uploader>[\w\d-]+)/   32                              (?!sets/)(?P<title>[\w\d-]+)/?   33                              (?P<token>[^?]+?)?(?:[?].*)?$)   34                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))   35                         |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)   38      IE_NAME 
=  'soundcloud'   41              'url' :  'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,   42              'file' :  '62986583.mp3' ,   43              'md5' :  'ebef0a451b909710ed1d7787dddbf0d7' ,   45                  "upload_date" :  "20121011" ,   46                  "description" :  "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd" ,   47                  "uploader" :  "E.T. ExTerrestrial Music" ,   48                  "title" :  "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" ,   54              'url' :  'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,   58                  'title' :  'Goldrushed' ,   59                  'description' :  'From Stockholm Sweden \r\n Povel / Magnus / Filip / David \r\n www.theroyalconcept.com' ,   60                  'uploader' :  'The Royal Concept' ,   61                  'upload_date' :  '20120521' ,   66                  'skip_download' :  True ,   71              'url' :  'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,   72              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,   76                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,   77                  'uploader' :  'jaimeMF' ,   78                  'description' :  'test chars:   \"\' / \\ ä↭' ,   79                  'upload_date' :  '20131209' ,   85              'url' :  'https://soundcloud.com/simgretina/just-your-problem-baby-1' ,   86              'md5' :  '56a8b69568acaa967b4c49f9d1d52d19' ,   90                  'title' :  'Just Your Problem Baby (Acapella)' ,   91                  'description' :  'Vocals' ,   92                  'uploader' :  'Sim Gretina' ,   93                  'upload_date' :  '20130815' ,   99      _CLIENT_ID 
=  'b45b1aa10f1ac2941910a7f0d10f8e28'  100      _IPHONE_CLIENT_ID 
=  '376f225bf427445fc4bfb6b99b72e0bf'  102      def  report_resolve ( self
,  video_id
):  103          """Report information extraction."""  104          self
. to_screen ( ' %s : Resolving id'  %  video_id
)  107      def  _resolv_url ( cls
,  url
):  108          return  'http://api.soundcloud.com/resolve.json?url='  +  url 
+  '&client_id='  +  cls
._ CLIENT
_ ID
 110      def  _extract_info_dict ( self
,  info
,  full_title
= None ,  quiet
= False ,  secret_token
= None ):  111          track_id 
=  compat_str ( info
[ 'id' ])  112          name 
=  full_title 
or  track_id
 114              self
. report_extraction ( name
)  116          thumbnail 
=  info
[ 'artwork_url' ]  117          if  thumbnail 
is not None :  118              thumbnail 
=  thumbnail
. replace ( '-large' ,  '-t500x500' )  122              'uploader' :  info
[ 'user' ][ 'username' ],  123              'upload_date' :  unified_strdate ( info
[ 'created_at' ]),  124              'title' :  info
[ 'title' ],  125              'description' :  info
[ 'description' ],  126              'thumbnail' :  thumbnail
,  127              'duration' :  int_or_none ( info
. get ( 'duration' ),  1000 ),  130          if  info
. get ( 'downloadable' ,  False ):  131              # We can build a direct link to the song  133                  'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (  134                      track_id
,  self
._ CLIENT
_ ID
))  136                  'format_id' :  'download' ,  137                  'ext' :  info
. get ( 'original_format' ,  'mp3' ),  143          # We have to retrieve the url  144          streams_url 
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'  145              'client_id= {1} &secret_token= {2} ' . format ( track_id
,  self
._ IPHONE
_ CLIENT
_ ID
,  secret_token
))  146          format_dict 
=  self
._ download
_ json
(  148              track_id
,  'Downloading track url' )  150          for  key
,  stream_url 
in  format_dict
. items ():  151              if  key
. startswith ( 'http' ):  158              elif  key
. startswith ( 'rtmp' ):  159                  # The url doesn't have an rtmp app, we have to extract the playpath  160                  url
,  path 
=  stream_url
. split ( 'mp3:' ,  1 )  164                      'play_path' :  'mp3:'  +  path
,  170                  # We fallback to the stream_url in the original info, this  171                  # cannot be always used, sometimes it can give an HTTP 404 error  173                      'format_id' :  'fallback' ,  174                      'url' :  info
[ 'stream_url' ] +  '?client_id='  +  self
._ CLIENT
_ ID
,  180                  if  f
[ 'format_id' ]. startswith ( 'http' ):  181                      f
[ 'protocol' ] =  'http'  182                  if  f
[ 'format_id' ]. startswith ( 'rtmp' ):  183                      f
[ 'protocol' ] =  'rtmp'  185              self
._ sort
_ formats
( formats
)  186              result
[ 'formats' ] =  formats
 190      def  _real_extract ( self
,  url
):  191          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  flags
= re
. VERBOSE
)  193              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  195          track_id 
=  mobj
. group ( 'track_id' )  197          if  track_id 
is not None :  198              info_json_url 
=  'http://api.soundcloud.com/tracks/'  +  track_id 
+  '.json?client_id='  +  self
._ CLIENT
_ ID
 199              full_title 
=  track_id
 200          elif  mobj
. group ( 'player' ):  201              query 
=  compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)  202              return  self
. url_result ( query
[ 'url' ][ 0 ])  204              # extract uploader (which is in the url)  205              uploader 
=  mobj
. group ( 'uploader' )  206              # extract simple title (uploader + slug of song title)  207              slug_title 
=   mobj
. group ( 'title' )  208              token 
=  mobj
. group ( 'token' )  209              full_title 
=  resolve_title 
=  ' %s / %s '  % ( uploader
,  slug_title
)  211                  resolve_title 
+=  '/ %s '  %  token
 213              self
. report_resolve ( full_title
)  215              url 
=  'http://soundcloud.com/ %s '  %  resolve_title
 216              info_json_url 
=  self
._ resolv
_u rl
( url
)  217          info 
=  self
._ download
_ json
( info_json_url
,  full_title
,  'Downloading info JSON' )  219          return  self
._ extract
_ info
_ dict
( info
,  full_title
,  secret_token
= token
)  222  class  SoundcloudSetIE ( SoundcloudIE
):  223      _VALID_URL 
=  r
'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'  224      IE_NAME 
=  'soundcloud:set'  225      # it's in tests/test_playlists.py  228      def  _real_extract ( self
,  url
):  229          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  231              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  233          # extract uploader (which is in the url)  234          uploader 
=  mobj
. group ( 1 )  235          # extract simple title (uploader + slug of song title)  236          slug_title 
=  mobj
. group ( 2 )  237          full_title 
=  ' %s /sets/ %s '  % ( uploader
,  slug_title
)  239          self
. report_resolve ( full_title
)  241          url 
=  'http://soundcloud.com/ %s /sets/ %s '  % ( uploader
,  slug_title
)  242          resolv_url 
=  self
._ resolv
_u rl
( url
)  243          info 
=  self
._ download
_ json
( resolv_url
,  full_title
)  246              for  err 
in  info
[ 'errors' ]:  247                  self
._ downloader
. report_error ( 'unable to download video webpage:  %s '  %  compat_str ( err
[ 'error_message' ]))  250          self
. report_extraction ( full_title
)  251          return  { '_type' :  'playlist' ,  252                  'entries' : [ self
._ extract
_ info
_ dict
( track
)  for  track 
in  info
[ 'tracks' ]],  254                  'title' :  info
[ 'title' ],  258  class  SoundcloudUserIE ( SoundcloudIE
):  259      _VALID_URL 
=  r
'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'  260      IE_NAME 
=  'soundcloud:user'  262      # it's in tests/test_playlists.py  265      def  _real_extract ( self
,  url
):  266          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  267          uploader 
=  mobj
. group ( 'user' )  269          url 
=  'http://soundcloud.com/ %s /'  %  uploader
 270          resolv_url 
=  self
._ resolv
_u rl
( url
)  271          user 
=  self
._ download
_ json
(  272              resolv_url
,  uploader
,  'Downloading user info' )  273          base_url 
=  'http://api.soundcloud.com/users/ %s /tracks.json?'  %  uploader
 276          for  i 
in  itertools
. count ():  277              data 
=  compat_urllib_parse
. urlencode ({  279                  'client_id' :  self
._ CLIENT
_ ID
,  281              new_entries 
=  self
._ download
_ json
(  282                  base_url 
+  data
,  uploader
,  'Downloading track page  %s '  % ( i 
+  1 ))  283              entries
. extend ( self
._ extract
_ info
_ dict
( e
,  quiet
= True )  for  e 
in  new_entries
)  284              if  len ( new_entries
) <  50 :  289              'id' :  compat_str ( user
[ 'id' ]),  290              'title' :  user
[ 'username' ],  295  class  SoundcloudPlaylistIE ( SoundcloudIE
):  296      _VALID_URL 
=  r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'  297      IE_NAME 
=  'soundcloud:playlist'  299       # it's in tests/test_playlists.py  302      def  _real_extract ( self
,  url
):  303          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  304          playlist_id 
=  mobj
. group ( 'id' )  305          base_url 
=  ' %s //api.soundcloud.com/playlists/ %s .json?'  % ( self
. http_scheme (),  playlist_id
)  307          data 
=  compat_urllib_parse
. urlencode ({  308              'client_id' :  self
._ CLIENT
_ ID
,  310          data 
=  self
._ download
_ json
(  311              base_url 
+  data
,  playlist_id
,  'Downloading playlist' )  314              self
._ extract
_ info
_ dict
( t
,  quiet
= True )  for  t 
in  data
[ 'tracks' ]]  319              'title' :  data
. get ( 'title' ),  320              'description' :  data
. get ( 'description' ),