]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hypem.py
Merge tag 'upstream/2013.08.08'
[youtubedl] / youtube_dl / extractor / hypem.py
1 import json
2 import re
3 import time
4
5 from .common import InfoExtractor
6 from ..utils import (
7 compat_str,
8 compat_urllib_parse,
9 compat_urllib_request,
10
11 ExtractorError,
12 )
13
14
15 class HypemIE(InfoExtractor):
16 """Information Extractor for hypem"""
17 _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
18 _TEST = {
19 u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
20 u'file': u'1v6ga.mp3',
21 u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
22 u'info_dict': {
23 u"title": u"Tame"
24 }
25 }
26
27 def _real_extract(self, url):
28 mobj = re.match(self._VALID_URL, url)
29 if mobj is None:
30 raise ExtractorError(u'Invalid URL: %s' % url)
31 track_id = mobj.group(1)
32
33 data = { 'ax': 1, 'ts': time.time() }
34 data_encoded = compat_urllib_parse.urlencode(data)
35 complete_url = url + "?" + data_encoded
36 request = compat_urllib_request.Request(complete_url)
37 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
38 cookie = urlh.headers.get('Set-Cookie', '')
39
40 self.report_extraction(track_id)
41
42 html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
43 response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
44 try:
45 track_list = json.loads(html_tracks)
46 track = track_list[u'tracks'][0]
47 except ValueError:
48 raise ExtractorError(u'Hypemachine contained invalid JSON.')
49
50 key = track[u"key"]
51 track_id = track[u"id"]
52 artist = track[u"artist"]
53 title = track[u"song"]
54
55 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
56 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
57 request.add_header('cookie', cookie)
58 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
59 try:
60 song_data = json.loads(song_data_json)
61 except ValueError:
62 raise ExtractorError(u'Hypemachine contained invalid JSON.')
63 final_url = song_data[u"url"]
64
65 return [{
66 'id': track_id,
67 'url': final_url,
68 'ext': "mp3",
69 'title': title,
70 'artist': artist,
71 }]