]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/grooveshark.py
fff74a70a891fc163ff488408e4df348564b8a29
   2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  11 from ..compat 
import ( 
  14     compat_urllib_request
, 
  17 from ..utils 
import ExtractorError
 
  20 class GroovesharkHtmlParser(compat_html_parser
.HTMLParser
): 
  22         self
._current
_object 
= None 
  24         compat_html_parser
.HTMLParser
.__init
__(self
) 
  26     def handle_starttag(self
, tag
, attrs
): 
  27         attrs 
= dict((k
, v
) for k
, v 
in attrs
) 
  29             self
._current
_object 
= {'attrs': attrs
, 'params': []} 
  31             self
._current
_object
['params'].append(attrs
) 
  33     def handle_endtag(self
, tag
): 
  35             self
.objects
.append(self
._current
_object
) 
  36             self
._current
_object 
= None 
  39     def extract_object_tags(cls
, html
): 
  46 class GroovesharkIE(InfoExtractor
): 
  47     _VALID_URL 
= r
'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)' 
  49         'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5', 
  50         'md5': '7ecf8aefa59d6b2098517e1baa530023', 
  53             'title': 'Jolene (Tenth Key Remix ft. Will Sessions)', 
  59     do_playerpage_request 
= True 
  60     do_bootstrap_request 
= True 
  62     def _parse_target(self
, target
): 
  63         uri 
= compat_urlparse
.urlparse(target
) 
  64         hash = uri
.fragment
[1:].split('?')[0] 
  65         token 
= os
.path
.basename(hash.rstrip('/')) 
  66         return (uri
, hash, token
) 
  68     def _build_bootstrap_url(self
, target
): 
  69         (uri
, hash, token
) = self
._parse
_target
(target
) 
  70         query 
= 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse
.quote(hash, safe
=''), self
.ts
) 
  71         return (compat_urlparse
.urlunparse((uri
.scheme
, uri
.netloc
, '/preload.php', None, query
, None)), token
) 
  73     def _build_meta_url(self
, target
): 
  74         (uri
, hash, token
) = self
._parse
_target
(target
) 
  75         query 
= 'hash=%s&%d' % (compat_urllib_parse
.quote(hash, safe
=''), self
.ts
) 
  76         return (compat_urlparse
.urlunparse((uri
.scheme
, uri
.netloc
, '/preload.php', None, query
, None)), token
) 
  78     def _build_stream_url(self
, meta
): 
  79         return compat_urlparse
.urlunparse(('http', meta
['streamKey']['ip'], '/stream.php', None, None, None)) 
  81     def _build_swf_referer(self
, target
, obj
): 
  82         (uri
, _
, _
) = self
._parse
_target
(target
) 
  83         return compat_urlparse
.urlunparse((uri
.scheme
, uri
.netloc
, obj
['attrs']['data'], None, None, None)) 
  85     def _transform_bootstrap(self
, js
): 
  86         return re
.split('(?m)^\s*try\s*{', js
)[0] \
 
  87                  .split(' = ', 1)[1].strip().rstrip(';') 
  89     def _transform_meta(self
, js
): 
  90         return js
.split('\n')[0].split('=')[1].rstrip(';') 
  92     def _get_meta(self
, target
): 
  93         (meta_url
, token
) = self
._build
_meta
_url
(target
) 
  94         self
.to_screen('Metadata URL: %s' % meta_url
) 
  96         headers 
= {'Referer': compat_urlparse
.urldefrag(target
)[0]} 
  97         req 
= compat_urllib_request
.Request(meta_url
, headers
=headers
) 
  98         res 
= self
._download
_json
(req
, token
, 
  99                                   transform_source
=self
._transform
_meta
) 
 101         if 'getStreamKeyWithSong' not in res
: 
 102             raise ExtractorError( 
 103                 'Metadata not found. URL may be malformed, or Grooveshark API may have changed.') 
 105         if res
['getStreamKeyWithSong'] is None: 
 106             raise ExtractorError( 
 107                 'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.', 
 110         return res
['getStreamKeyWithSong'] 
 112     def _get_bootstrap(self
, target
): 
 113         (bootstrap_url
, token
) = self
._build
_bootstrap
_url
(target
) 
 115         headers 
= {'Referer': compat_urlparse
.urldefrag(target
)[0]} 
 116         req 
= compat_urllib_request
.Request(bootstrap_url
, headers
=headers
) 
 117         res 
= self
._download
_json
(req
, token
, fatal
=False, 
 118                                   note
='Downloading player bootstrap data', 
 119                                   errnote
='Unable to download player bootstrap data', 
 120                                   transform_source
=self
._transform
_bootstrap
) 
 123     def _get_playerpage(self
, target
): 
 124         (_
, _
, token
) = self
._parse
_target
(target
) 
 126         webpage 
= self
._download
_webpage
( 
 128             note
='Downloading player page', 
 129             errnote
='Unable to download player page', 
 132         if webpage 
is not None: 
 133             # Search (for example German) error message 
 134             error_msg 
= self
._html
_search
_regex
( 
 135                 r
'<div id="content">\s*<h2>(.*?)</h2>', webpage
, 
 136                 'error message', default
=None) 
 137             if error_msg 
is not None: 
 138                 error_msg 
= error_msg
.replace('\n', ' ') 
 139                 raise ExtractorError('Grooveshark said: %s' % error_msg
) 
 141         if webpage 
is not None: 
 142             o 
= GroovesharkHtmlParser
.extract_object_tags(webpage
) 
 143             return (webpage
, [x 
for x 
in o 
if x
['attrs']['id'] == 'jsPlayerEmbed']) 
 145         return (webpage
, None) 
 147     def _real_initialize(self
): 
 148         self
.ts 
= int(time
.time() * 1000)  # timestamp in millis 
 150     def _real_extract(self
, url
): 
 151         (target_uri
, _
, token
) = self
._parse
_target
(url
) 
 153         # 1. Fill cookiejar by making a request to the player page 
 155         if self
.do_playerpage_request
: 
 156             (_
, player_objs
) = self
._get
_playerpage
(url
) 
 157             if player_objs 
is not None: 
 158                 swf_referer 
= self
._build
_swf
_referer
(url
, player_objs
[0]) 
 159                 self
.to_screen('SWF Referer: %s' % swf_referer
) 
 161         # 2. Ask preload.php for swf bootstrap data to better mimic webapp 
 162         if self
.do_bootstrap_request
: 
 163             bootstrap 
= self
._get
_bootstrap
(url
) 
 164             self
.to_screen('CommunicationToken: %s' % bootstrap
['getCommunicationToken']) 
 166         # 3. Ask preload.php for track metadata. 
 167         meta 
= self
._get
_meta
(url
) 
 169         # 4. Construct stream request for track. 
 170         stream_url 
= self
._build
_stream
_url
(meta
) 
 171         duration 
= int(math
.ceil(float(meta
['streamKey']['uSecs']) / 1000000)) 
 172         post_dict 
= {'streamKey': meta
['streamKey']['streamKey']} 
 173         post_data 
= compat_urllib_parse
.urlencode(post_dict
).encode('utf-8') 
 175             'Content-Length': len(post_data
), 
 176             'Content-Type': 'application/x-www-form-urlencoded' 
 178         if swf_referer 
is not None: 
 179             headers
['Referer'] = swf_referer
 
 183             'title': meta
['song']['Name'], 
 184             'http_method': 'POST', 
 187             'format': 'mp3 audio', 
 188             'duration': duration
, 
 189             'http_post_data': post_data
, 
 190             'http_headers': headers
,