Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/eighttracks.py

   1 import itertools
   2 import json
   3 import random
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     ExtractorError,
   9 )
  10
  11
  12 class EightTracksIE(InfoExtractor):
  13     IE_NAME = '8tracks'
  14     _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
  15     _TEST = {
  16         u"name": u"EightTracks",
  17         u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
  18         u"playlist": [
  19             {
  20                 u"file": u"11885610.m4a",
  21                 u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
  22                 u"info_dict": {
  23                     u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
  24                     u"uploader_id": u"ytdl"
  25                 }
  26             },
  27             {
  28                 u"file": u"11885608.m4a",
  29                 u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
  30                 u"info_dict": {
  31                     u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
  32                     u"uploader_id": u"ytdl"
  33                 }
  34             },
  35             {
  36                 u"file": u"11885679.m4a",
  37                 u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
  38                 u"info_dict": {
  39                     u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
  40                     u"uploader_id": u"ytdl"
  41                 }
  42             },
  43             {
  44                 u"file": u"11885680.m4a",
  45                 u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
  46                 u"info_dict": {
  47                     u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
  48                     u"uploader_id": u"ytdl"
  49                 }
  50             },
  51             {
  52                 u"file": u"11885682.m4a",
  53                 u"md5": u"1893e872e263a2705558d1d319ad19e8",
  54                 u"info_dict": {
  55                     u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
  56                     u"uploader_id": u"ytdl"
  57                 }
  58             },
  59             {
  60                 u"file": u"11885683.m4a",
  61                 u"md5": u"b673c46f47a216ab1741ae8836af5899",
  62                 u"info_dict": {
  63                     u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
  64                     u"uploader_id": u"ytdl"
  65                 }
  66             },
  67             {
  68                 u"file": u"11885684.m4a",
  69                 u"md5": u"1d74534e95df54986da7f5abf7d842b7",
  70                 u"info_dict": {
  71                     u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
  72                     u"uploader_id": u"ytdl"
  73                 }
  74             },
  75             {
  76                 u"file": u"11885685.m4a",
  77                 u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
  78                 u"info_dict": {
  79                     u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
  80                     u"uploader_id": u"ytdl"
  81                 }
  82             }
  83         ]
  84     }
  85
  86
  87     def _real_extract(self, url):
  88         mobj = re.match(self._VALID_URL, url)
  89         if mobj is None:
  90             raise ExtractorError(u'Invalid URL: %s' % url)
  91         playlist_id = mobj.group('id')
  92
  93         webpage = self._download_webpage(url, playlist_id)
  94
  95         json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
  96         data = json.loads(json_like)
  97
  98         session = str(random.randint(0, 1000000000))
  99         mix_id = data['id']
 100         track_count = data['tracks_count']
 101         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
 102         next_url = first_url
 103         res = []
 104         for i in itertools.count():
 105             api_json = self._download_webpage(next_url, playlist_id,
 106                 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
 107                 errnote=u'Failed to download song information')
 108             api_data = json.loads(api_json)
 109             track_data = api_data[u'set']['track']
 110             info = {
 111                 'id': track_data['id'],
 112                 'url': track_data['track_file_stream_url'],
 113                 'title': track_data['performer'] + u' - ' + track_data['name'],
 114                 'raw_title': track_data['name'],
 115                 'uploader_id': data['user']['login'],
 116                 'ext': 'm4a',
 117             }
 118             res.append(info)
 119             if api_data['set']['at_last_track']:
 120                 break
 121             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
 122         return res