]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/hypem.py
debian/copyright: Use HTTPS for upstream's source.
[youtubedl] / youtube_dl / extractor / hypem.py
index ceec4f616e6b77f6766261b4cdde1761213a5ae5..f7c9130540e51a75a83052704d61403b488b25f6 100644 (file)
@@ -1,63 +1,61 @@
+from __future__ import unicode_literals
+
 import json
-import re
 import time
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
-    compat_str,
-    compat_urllib_parse,
-    compat_urllib_request,
-
     ExtractorError,
+    sanitized_Request,
 )
 
 
 class HypemIE(InfoExtractor):
-    """Information Extractor for hypem"""
-    _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
+    _TEST = {
+        'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
+        'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
+        'info_dict': {
+            'id': '1v6ga',
+            'ext': 'mp3',
+            'title': 'Tame',
+            'uploader': 'BODYWORK',
+        }
+    }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        track_id = mobj.group(1)
-
-        data = { 'ax': 1, 'ts': time.time() }
-        data_encoded = compat_urllib_parse.urlencode(data)
-        complete_url = url + "?" + data_encoded
-        request = compat_urllib_request.Request(complete_url)
-        response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
-        cookie = urlh.headers.get('Set-Cookie', '')
-
-        self.report_extraction(track_id)
-
-        html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
-            response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
-        try:
-            track_list = json.loads(html_tracks)
-            track = track_list[u'tracks'][0]
-        except ValueError:
-            raise ExtractorError(u'Hypemachine contained invalid JSON.')
+        track_id = self._match_id(url)
 
-        key = track[u"key"]
-        track_id = track[u"id"]
-        artist = track[u"artist"]
-        title = track[u"song"]
+        data = {'ax': 1, 'ts': time.time()}
+        request = sanitized_Request(url + '?' + compat_urllib_parse_urlencode(data))
+        response, urlh = self._download_webpage_handle(
+            request, track_id, 'Downloading webpage with the url')
 
-        serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
-        request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
-        request.add_header('cookie', cookie)
-        song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
+        html_tracks = self._html_search_regex(
+            r'(?ms)<script type="application/json" id="displayList-data">(.+?)</script>',
+            response, 'tracks')
         try:
-            song_data = json.loads(song_data_json)
+            track_list = json.loads(html_tracks)
+            track = track_list['tracks'][0]
         except ValueError:
-            raise ExtractorError(u'Hypemachine contained invalid JSON.')
-        final_url = song_data[u"url"]
-
-        return [{
-            'id':       track_id,
-            'url':      final_url,
-            'ext':      "mp3",
-            'title':    title,
-            'artist':   artist,
-        }]
\ No newline at end of file
+            raise ExtractorError('Hypemachine contained invalid JSON.')
+
+        key = track['key']
+        track_id = track['id']
+        title = track['song']
+
+        request = sanitized_Request(
+            'http://hypem.com/serve/source/%s/%s' % (track_id, key),
+            '', {'Content-Type': 'application/json'})
+        song_data = self._download_json(request, track_id, 'Downloading metadata')
+        final_url = song_data['url']
+        artist = track.get('artist')
+
+        return {
+            'id': track_id,
+            'url': final_url,
+            'ext': 'mp3',
+            'title': title,
+            'uploader': artist,
+        }