]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/grooveshark.py
726adff773305844a5e8950b4467509646b79d1d
   2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  11 from ..utils 
import ExtractorError
, compat_urllib_request
, compat_html_parser
 
  19 class GroovesharkHtmlParser(compat_html_parser
.HTMLParser
): 
  21         self
._current
_object 
= None 
  23         compat_html_parser
.HTMLParser
.__init
__(self
) 
  25     def handle_starttag(self
, tag
, attrs
): 
  26         attrs 
= dict((k
, v
) for k
, v 
in attrs
) 
  28             self
._current
_object 
= {'attrs': attrs
, 'params': []} 
  30             self
._current
_object
['params'].append(attrs
) 
  32     def handle_endtag(self
, tag
): 
  34             self
.objects
.append(self
._current
_object
) 
  35             self
._current
_object 
= None 
  38     def extract_object_tags(cls
, html
): 
  45 class GroovesharkIE(InfoExtractor
): 
  46     _VALID_URL 
= r
'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)' 
  48         'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5', 
  49         'md5': '7ecf8aefa59d6b2098517e1baa530023', 
  52             'title': 'Jolene (Tenth Key Remix ft. Will Sessions)', 
  58     do_playerpage_request 
= True 
  59     do_bootstrap_request 
= True 
  61     def _parse_target(self
, target
): 
  62         uri 
= compat_urlparse
.urlparse(target
) 
  63         hash = uri
.fragment
[1:].split('?')[0] 
  64         token 
= os
.path
.basename(hash.rstrip('/')) 
  65         return (uri
, hash, token
) 
  67     def _build_bootstrap_url(self
, target
): 
  68         (uri
, hash, token
) = self
._parse
_target
(target
) 
  69         query 
= 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse
.quote(hash, safe
=''), self
.ts
) 
  70         return (compat_urlparse
.urlunparse((uri
.scheme
, uri
.netloc
, '/preload.php', None, query
, None)), token
) 
  72     def _build_meta_url(self
, target
): 
  73         (uri
, hash, token
) = self
._parse
_target
(target
) 
  74         query 
= 'hash=%s&%d' % (compat_urllib_parse
.quote(hash, safe
=''), self
.ts
) 
  75         return (compat_urlparse
.urlunparse((uri
.scheme
, uri
.netloc
, '/preload.php', None, query
, None)), token
) 
  77     def _build_stream_url(self
, meta
): 
  78         return compat_urlparse
.urlunparse(('http', meta
['streamKey']['ip'], '/stream.php', None, None, None)) 
  80     def _build_swf_referer(self
, target
, obj
): 
  81         (uri
, _
, _
) = self
._parse
_target
(target
) 
  82         return compat_urlparse
.urlunparse((uri
.scheme
, uri
.netloc
, obj
['attrs']['data'], None, None, None)) 
  84     def _transform_bootstrap(self
, js
): 
  85         return re
.split('(?m)^\s*try\s*{', js
)[0] \
 
  86                  .split(' = ', 1)[1].strip().rstrip(';') 
  88     def _transform_meta(self
, js
): 
  89         return js
.split('\n')[0].split('=')[1].rstrip(';') 
  91     def _get_meta(self
, target
): 
  92         (meta_url
, token
) = self
._build
_meta
_url
(target
) 
  93         self
.to_screen('Metadata URL: %s' % meta_url
) 
  95         headers 
= {'Referer': compat_urlparse
.urldefrag(target
)[0]} 
  96         req 
= compat_urllib_request
.Request(meta_url
, headers
=headers
) 
  97         res 
= self
._download
_json
(req
, token
, 
  98                                   transform_source
=self
._transform
_meta
) 
 100         if 'getStreamKeyWithSong' not in res
: 
 101             raise ExtractorError( 
 102                 'Metadata not found. URL may be malformed, or Grooveshark API may have changed.') 
 104         if res
['getStreamKeyWithSong'] is None: 
 105             raise ExtractorError( 
 106                 'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.', 
 109         return res
['getStreamKeyWithSong'] 
 111     def _get_bootstrap(self
, target
): 
 112         (bootstrap_url
, token
) = self
._build
_bootstrap
_url
(target
) 
 114         headers 
= {'Referer': compat_urlparse
.urldefrag(target
)[0]} 
 115         req 
= compat_urllib_request
.Request(bootstrap_url
, headers
=headers
) 
 116         res 
= self
._download
_json
(req
, token
, fatal
=False, 
 117                                   note
='Downloading player bootstrap data', 
 118                                   errnote
='Unable to download player bootstrap data', 
 119                                   transform_source
=self
._transform
_bootstrap
) 
 122     def _get_playerpage(self
, target
): 
 123         (_
, _
, token
) = self
._parse
_target
(target
) 
 125         webpage 
= self
._download
_webpage
( 
 127             note
='Downloading player page', 
 128             errnote
='Unable to download player page', 
 131         if webpage 
is not None: 
 132             # Search (for example German) error message 
 133             error_msg 
= self
._html
_search
_regex
( 
 134                 r
'<div id="content">\s*<h2>(.*?)</h2>', webpage
, 
 135                 'error message', default
=None) 
 136             if error_msg 
is not None: 
 137                 error_msg 
= error_msg
.replace('\n', ' ') 
 138                 raise ExtractorError('Grooveshark said: %s' % error_msg
) 
 140         if webpage 
is not None: 
 141             o 
= GroovesharkHtmlParser
.extract_object_tags(webpage
) 
 142             return (webpage
, [x 
for x 
in o 
if x
['attrs']['id'] == 'jsPlayerEmbed']) 
 144         return (webpage
, None) 
 146     def _real_initialize(self
): 
 147         self
.ts 
= int(time
.time() * 1000)  # timestamp in millis 
 149     def _real_extract(self
, url
): 
 150         (target_uri
, _
, token
) = self
._parse
_target
(url
) 
 152         # 1. Fill cookiejar by making a request to the player page 
 154         if self
.do_playerpage_request
: 
 155             (_
, player_objs
) = self
._get
_playerpage
(url
) 
 156             if player_objs 
is not None: 
 157                 swf_referer 
= self
._build
_swf
_referer
(url
, player_objs
[0]) 
 158                 self
.to_screen('SWF Referer: %s' % swf_referer
) 
 160         # 2. Ask preload.php for swf bootstrap data to better mimic webapp 
 161         if self
.do_bootstrap_request
: 
 162             bootstrap 
= self
._get
_bootstrap
(url
) 
 163             self
.to_screen('CommunicationToken: %s' % bootstrap
['getCommunicationToken']) 
 165         # 3. Ask preload.php for track metadata. 
 166         meta 
= self
._get
_meta
(url
) 
 168         # 4. Construct stream request for track. 
 169         stream_url 
= self
._build
_stream
_url
(meta
) 
 170         duration 
= int(math
.ceil(float(meta
['streamKey']['uSecs']) / 1000000)) 
 171         post_dict 
= {'streamKey': meta
['streamKey']['streamKey']} 
 172         post_data 
= compat_urllib_parse
.urlencode(post_dict
).encode('utf-8') 
 174             'Content-Length': len(post_data
), 
 175             'Content-Type': 'application/x-www-form-urlencoded' 
 177         if swf_referer 
is not None: 
 178             headers
['Referer'] = swf_referer
 
 182             'title': meta
['song']['Name'], 
 183             'http_method': 'POST', 
 186             'format': 'mp3 audio', 
 187             'duration': duration
, 
 188             'http_post_data': post_data
, 
 189             'http_headers': headers
,