]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soundcloud.py 
5d60c4939588ad543840b501ef0e552ad0b1e673
   2  from  __future__ 
import  unicode_literals
   7  from  . common 
import  InfoExtractor
  20  class  SoundcloudIE ( InfoExtractor
):   21      """Information extractor for soundcloud.com   22         To access the media, the uid of the song and a stream token   23         must be extracted from the page source and the script must make   24         a request to media.soundcloud.com/crossdomain.xml. Then   25         the media can be grabbed by requesting from an url composed   26         of the stream token and uid   29      _VALID_URL 
=  r
'''(?x)^(?:https?://)?   30                      (?:(?:(?:www\.|m\.)?soundcloud\.com/   31                              (?P<uploader>[\w\d-]+)/   32                              (?!sets/|likes/?(?:$|[?#]))   34                              (?P<token>[^?]+?)?(?:[?].*)?$)   35                         |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)   36                            (?:/?\?secret_token=(?P<secret_token>[^&]+))?)   37                         |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)   40      IE_NAME 
=  'soundcloud'   43              'url' :  'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy' ,   44              'md5' :  'ebef0a451b909710ed1d7787dddbf0d7' ,   48                  'upload_date' :  '20121011' ,   49                  'description' :  'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d' ,   50                  'uploader' :  'E.T. ExTerrestrial Music' ,   51                  'title' :  'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1' ,   57              'url' :  'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep' ,   61                  'title' :  'Goldrushed' ,   62                  'description' :  'From Stockholm Sweden \r\n Povel / Magnus / Filip / David \r\n www.theroyalconcept.com' ,   63                  'uploader' :  'The Royal Concept' ,   64                  'upload_date' :  '20120521' ,   69                  'skip_download' :  True ,   74              'url' :  'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp' ,   75              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,   79                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,   80                  'uploader' :  'jaimeMF' ,   81                  'description' :  'test chars:   \"\' / \\ ä↭' ,   82                  'upload_date' :  '20131209' ,   86          # private link (alt format)   88              'url' :  'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp' ,   89              'md5' :  'aa0dd32bfea9b0c5ef4f02aacd080604' ,   93                  'title' :  'Youtube - Dl Test Video  \'\'  Ä↭' ,   94                  'uploader' :  'jaimeMF' ,   95                  'description' :  'test chars:   \"\' / \\ ä↭' ,   96                  'upload_date' :  '20131209' ,  102              'url' :  'https://soundcloud.com/oddsamples/bus-brakes' ,  103              'md5' :  '7624f2351f8a3b2e7cd51522496e7631' ,  107                  'title' :  'Bus Brakes' ,  108                  'description' :  'md5:0053ca6396e8d2fd7b7e1595ef12ab66' ,  109                  'uploader' :  'oddsamples' ,  110                  'upload_date' :  '20140109' ,  116      _CLIENT_ID 
=  'b45b1aa10f1ac2941910a7f0d10f8e28'  117      _IPHONE_CLIENT_ID 
=  '376f225bf427445fc4bfb6b99b72e0bf'  119      def  report_resolve ( self
,  video_id
):  120          """Report information extraction."""  121          self
. to_screen ( ' %s : Resolving id'  %  video_id
)  124      def  _resolv_url ( cls
,  url
):  125          return  'http://api.soundcloud.com/resolve.json?url='  +  url 
+  '&client_id='  +  cls
._ CLIENT
_ ID
 127      def  _extract_info_dict ( self
,  info
,  full_title
= None ,  quiet
= False ,  secret_token
= None ):  128          track_id 
=  compat_str ( info
[ 'id' ])  129          name 
=  full_title 
or  track_id
 131              self
. report_extraction ( name
)  133          thumbnail 
=  info
[ 'artwork_url' ]  134          if  thumbnail 
is not None :  135              thumbnail 
=  thumbnail
. replace ( '-large' ,  '-t500x500' )  139              'uploader' :  info
[ 'user' ][ 'username' ],  140              'upload_date' :  unified_strdate ( info
[ 'created_at' ]),  141              'title' :  info
[ 'title' ],  142              'description' :  info
[ 'description' ],  143              'thumbnail' :  thumbnail
,  144              'duration' :  int_or_none ( info
. get ( 'duration' ),  1000 ),  145              'webpage_url' :  info
. get ( 'permalink_url' ),  148          if  info
. get ( 'downloadable' ,  False ):  149              # We can build a direct link to the song  151                  'https://api.soundcloud.com/tracks/ {0} /download?client_id= {1} ' . format (  152                      track_id
,  self
._ CLIENT
_ ID
))  154                  'format_id' :  'download' ,  155                  'ext' :  info
. get ( 'original_format' ,  'mp3' ),  161          # We have to retrieve the url  162          streams_url 
= ( 'http://api.soundcloud.com/i1/tracks/ {0} /streams?'  163                         'client_id= {1} &secret_token= {2} ' . format ( track_id
,  self
._ IPHONE
_ CLIENT
_ ID
,  secret_token
))  164          format_dict 
=  self
._ download
_ json
(  166              track_id
,  'Downloading track url' )  168          for  key
,  stream_url 
in  format_dict
. items ():  169              if  key
. startswith ( 'http' ):  176              elif  key
. startswith ( 'rtmp' ):  177                  # The url doesn't have an rtmp app, we have to extract the playpath  178                  url
,  path 
=  stream_url
. split ( 'mp3:' ,  1 )  182                      'play_path' :  'mp3:'  +  path
,  188                  # We fallback to the stream_url in the original info, this  189                  # cannot be always used, sometimes it can give an HTTP 404 error  191                      'format_id' :  'fallback' ,  192                      'url' :  info
[ 'stream_url' ] +  '?client_id='  +  self
._ CLIENT
_ ID
,  198                  if  f
[ 'format_id' ]. startswith ( 'http' ):  199                      f
[ 'protocol' ] =  'http'  200                  if  f
[ 'format_id' ]. startswith ( 'rtmp' ):  201                      f
[ 'protocol' ] =  'rtmp'  203              self
._ sort
_ formats
( formats
)  204              result
[ 'formats' ] =  formats
 208      def  _real_extract ( self
,  url
):  209          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  flags
= re
. VERBOSE
)  211              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  213          track_id 
=  mobj
. group ( 'track_id' )  215          if  track_id 
is not None :  216              info_json_url 
=  'http://api.soundcloud.com/tracks/'  +  track_id 
+  '.json?client_id='  +  self
._ CLIENT
_ ID
 217              full_title 
=  track_id
 218              token 
=  mobj
. group ( 'secret_token' )  220                  info_json_url 
+=  "&secret_token="  +  token
 221          elif  mobj
. group ( 'player' ):  222              query 
=  compat_urlparse
. parse_qs ( compat_urlparse
. urlparse ( url
). query
)  223              return  self
. url_result ( query
[ 'url' ][ 0 ])  225              # extract uploader (which is in the url)  226              uploader 
=  mobj
. group ( 'uploader' )  227              # extract simple title (uploader + slug of song title)  228              slug_title 
=  mobj
. group ( 'title' )  229              token 
=  mobj
. group ( 'token' )  230              full_title 
=  resolve_title 
=  ' %s / %s '  % ( uploader
,  slug_title
)  232                  resolve_title 
+=  '/ %s '  %  token
 234              self
. report_resolve ( full_title
)  236              url 
=  'http://soundcloud.com/ %s '  %  resolve_title
 237              info_json_url 
=  self
._ resolv
_u rl
( url
)  238          info 
=  self
._ download
_ json
( info_json_url
,  full_title
,  'Downloading info JSON' )  240          return  self
._ extract
_ info
_ dict
( info
,  full_title
,  secret_token
= token
)  243  class  SoundcloudSetIE ( SoundcloudIE
):  244      _VALID_URL 
=  r
'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'  245      IE_NAME 
=  'soundcloud:set'  247          'url' :  'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep' ,  249              'title' :  'The Royal Concept EP' ,  251          'playlist_mincount' :  6 ,  254      def  _real_extract ( self
,  url
):  255          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  257          # extract uploader (which is in the url)  258          uploader 
=  mobj
. group ( 'uploader' )  259          # extract simple title (uploader + slug of song title)  260          slug_title 
=  mobj
. group ( 'slug_title' )  261          full_title 
=  ' %s /sets/ %s '  % ( uploader
,  slug_title
)  262          url 
=  'http://soundcloud.com/ %s /sets/ %s '  % ( uploader
,  slug_title
)  264          token 
=  mobj
. group ( 'token' )  266              full_title 
+=  '/'  +  token
 269          self
. report_resolve ( full_title
)  271          resolv_url 
=  self
._ resolv
_u rl
( url
)  272          info 
=  self
._ download
_ json
( resolv_url
,  full_title
)  275              for  err 
in  info
[ 'errors' ]:  276                  self
._ downloader
. report_error ( 'unable to download video webpage:  %s '  %  compat_str ( err
[ 'error_message' ]))  281              'entries' : [ self
._ extract
_ info
_ dict
( track
,  secret_token
= token
)  for  track 
in  info
[ 'tracks' ]],  283              'title' :  info
[ 'title' ],  287  class  SoundcloudUserIE ( SoundcloudIE
):  288      _VALID_URL 
=  r
'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'  289      IE_NAME 
=  'soundcloud:user'  291          'url' :  'https://soundcloud.com/the-concept-band' ,  294              'title' :  'The Royal Concept' ,  296          'playlist_mincount' :  12  298          'url' :  'https://soundcloud.com/the-concept-band/likes' ,  301              'title' :  'The Royal Concept' ,  303          'playlist_mincount' :  1 ,  306      def  _real_extract ( self
,  url
):  307          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  308          uploader 
=  mobj
. group ( 'user' )  309          resource 
=  mobj
. group ( 'rsrc' )  312          elif  resource 
==  'likes' :  313              resource 
=  'favorites'  315          url 
=  'http://soundcloud.com/ %s /'  %  uploader
 316          resolv_url 
=  self
._ resolv
_u rl
( url
)  317          user 
=  self
._ download
_ json
(  318              resolv_url
,  uploader
,  'Downloading user info' )  319          base_url 
=  'http://api.soundcloud.com/users/ %s / %s .json?'  % ( uploader
,  resource
)  322          for  i 
in  itertools
. count ():  323              data 
=  compat_urllib_parse
. urlencode ({  326                  'client_id' :  self
._ CLIENT
_ ID
,  328              new_entries 
=  self
._ download
_ json
(  329                  base_url 
+  data
,  uploader
,  'Downloading track page  %s '  % ( i 
+  1 ))  330              if  len ( new_entries
) ==  0 :  331                  self
. to_screen ( ' %s : End page received'  %  uploader
)  333              entries
. extend ( self
._ extract
_ info
_ dict
( e
,  quiet
= True )  for  e 
in  new_entries
)  337              'id' :  compat_str ( user
[ 'id' ]),  338              'title' :  user
[ 'username' ],  343  class  SoundcloudPlaylistIE ( SoundcloudIE
):  344      _VALID_URL 
=  r
'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'  345      IE_NAME 
=  'soundcloud:playlist'  347          'url' :  'http://api.soundcloud.com/playlists/4110309' ,  350              'title' :  'TILT Brass - Bowery Poetry Club, August  \' 03 [Non-Site SCR 02]' ,  351              'description' :  're:.*?TILT Brass - Bowery Poetry Club' ,  356      def  _real_extract ( self
,  url
):  357          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  358          playlist_id 
=  mobj
. group ( 'id' )  359          base_url 
=  ' %s //api.soundcloud.com/playlists/ %s .json?'  % ( self
. http_scheme (),  playlist_id
)  362              'client_id' :  self
._ CLIENT
_ ID
,  364          token 
=  mobj
. group ( 'token' )  367              data_dict
[ 'secret_token' ] =  token
 369          data 
=  compat_urllib_parse
. urlencode ( data_dict
)  370          data 
=  self
._ download
_ json
(  371              base_url 
+  data
,  playlist_id
,  'Downloading playlist' )  374              self
._ extract
_ info
_ dict
( t
,  quiet
= True ,  secret_token
= token
)  375              for  t 
in  data
[ 'tracks' ]]  380              'title' :  data
. get ( 'title' ),  381              'description' :  data
. get ( 'description' ),