Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hypem.py

   1 import json
   2 import re
   3 import time
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     compat_str,
   8     compat_urllib_parse,
   9     compat_urllib_request,
  10
  11     ExtractorError,
  12 )
  13
  14
  15 class HypemIE(InfoExtractor):
  16     """Information Extractor for hypem"""
  17     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
  18
  19     def _real_extract(self, url):
  20         mobj = re.match(self._VALID_URL, url)
  21         if mobj is None:
  22             raise ExtractorError(u'Invalid URL: %s' % url)
  23         track_id = mobj.group(1)
  24
  25         data = { 'ax': 1, 'ts': time.time() }
  26         data_encoded = compat_urllib_parse.urlencode(data)
  27         complete_url = url + "?" + data_encoded
  28         request = compat_urllib_request.Request(complete_url)
  29         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
  30         cookie = urlh.headers.get('Set-Cookie', '')
  31
  32         self.report_extraction(track_id)
  33
  34         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
  35             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
  36         try:
  37             track_list = json.loads(html_tracks)
  38             track = track_list[u'tracks'][0]
  39         except ValueError:
  40             raise ExtractorError(u'Hypemachine contained invalid JSON.')
  41
  42         key = track[u"key"]
  43         track_id = track[u"id"]
  44         artist = track[u"artist"]
  45         title = track[u"song"]
  46
  47         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
  48         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
  49         request.add_header('cookie', cookie)
  50         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
  51         try:
  52             song_data = json.loads(song_data_json)
  53         except ValueError:
  54             raise ExtractorError(u'Hypemachine contained invalid JSON.')
  55         final_url = song_data[u"url"]
  56
  57         return [{
  58             'id':       track_id,
  59             'url':      final_url,
  60             'ext':      "mp3",
  61             'title':    title,
  62             'artist':   artist,
  63         }]