]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hypem.py
debian/control: Make determination of python versions automatic.
[youtubedl] / youtube_dl / extractor / hypem.py
1 import json
2 import re
3 import time
4
5 from .common import InfoExtractor
6 from ..utils import (
7 compat_str,
8 compat_urllib_parse,
9 compat_urllib_request,
10
11 ExtractorError,
12 )
13
14
15 class HypemIE(InfoExtractor):
16 """Information Extractor for hypem"""
17 _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
18
19 def _real_extract(self, url):
20 mobj = re.match(self._VALID_URL, url)
21 if mobj is None:
22 raise ExtractorError(u'Invalid URL: %s' % url)
23 track_id = mobj.group(1)
24
25 data = { 'ax': 1, 'ts': time.time() }
26 data_encoded = compat_urllib_parse.urlencode(data)
27 complete_url = url + "?" + data_encoded
28 request = compat_urllib_request.Request(complete_url)
29 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
30 cookie = urlh.headers.get('Set-Cookie', '')
31
32 self.report_extraction(track_id)
33
34 html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
35 response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
36 try:
37 track_list = json.loads(html_tracks)
38 track = track_list[u'tracks'][0]
39 except ValueError:
40 raise ExtractorError(u'Hypemachine contained invalid JSON.')
41
42 key = track[u"key"]
43 track_id = track[u"id"]
44 artist = track[u"artist"]
45 title = track[u"song"]
46
47 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
48 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
49 request.add_header('cookie', cookie)
50 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
51 try:
52 song_data = json.loads(song_data_json)
53 except ValueError:
54 raise ExtractorError(u'Hypemachine contained invalid JSON.')
55 final_url = song_data[u"url"]
56
57 return [{
58 'id': track_id,
59 'url': final_url,
60 'ext': "mp3",
61 'title': title,
62 'artist': artist,
63 }]