X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/c512650955de0b16d37e7fa7fb29ea0985e415bb..7b54e7278c1ff62e888689ec470f488682311356:/youtube_dl/extractor/mixcloud.py diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 5f64e7b..9759560 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,106 +1,351 @@ from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_chr, + compat_ord, + compat_str, + compat_urllib_parse_unquote, + compat_zip +) from ..utils import ( - compat_urllib_parse, - ExtractorError, int_or_none, parse_iso8601, + strip_or_none, + try_get, ) -class MixcloudIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' +class MixcloudBaseIE(InfoExtractor): + def _call_api(self, object_type, object_fields, display_id, username, slug=None): + lookup_key = object_type + 'Lookup' + return self._download_json( + 'https://www.mixcloud.com/graphql', display_id, query={ + 'query': '''{ + %s(lookup: {username: "%s"%s}) { + %s + } +}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields) + })['data'][lookup_key] + + +class MixcloudIE(MixcloudBaseIE): + _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' IE_NAME = 'mixcloud' - _TEST = { + _TESTS = [{ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 'info_dict': { - 'id': 'dholbach-cryptkeeper', - 'ext': 'mp3', + 'id': 'dholbach_cryptkeeper', + 'ext': 'm4a', 'title': 'Cryptkeeper', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'uploader': 'Daniel Holbach', 'uploader_id': 'dholbach', - 'upload_date': '20111115', + 'thumbnail': r're:https?://.*\.jpg', + 'view_count': int, 'timestamp': 1321359578, - 'thumbnail': 're:https?://.*\.jpg', + 'upload_date': '20111115', + }, + }, { + 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', + 'info_dict': { + 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat', + 'ext': 'mp3', + 'title': 'Caribou 7 inch Vinyl Mix & Chat', + 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', + 'uploader': 'Gilles Peterson Worldwide', + 'uploader_id': 'gillespeterson', + 'thumbnail': 're:https?://.*', 'view_count': int, - 'like_count': int, + 'timestamp': 1422987057, + 'upload_date': '20150203', }, + }, { + 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', + 'only_matching': True, + }] + _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD' + + @staticmethod + def _decrypt_xor_cipher(key, ciphertext): + """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" + return ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(k)) + for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) + + def _real_extract(self, url): + username, slug = re.match(self._VALID_URL, url).groups() + username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) + track_id = '%s_%s' % (username, slug) + + cloudcast = self._call_api('cloudcast', '''audioLength + comments(first: 100) { + edges { + node { + comment + created + user { + displayName + username + } + } + } + totalCount } + description + favorites { + totalCount + } + featuringArtistList + isExclusive + name + owner { + displayName + url + username + } + picture(width: 1024, height: 1024) { + url + } + plays + publishDate + reposts { + totalCount + } + streamInfo { + dashUrl + hlsUrl + url + } + tags { + tag { + name + } + }''', track_id, username, slug) - def check_urls(self, url_list): - """Returns 1st active url from list""" - for url in url_list: - try: - # We only want to know if the request succeed - # don't download the whole file - self._request_webpage(url, None, False) - return url - except ExtractorError: - url = None + title = cloudcast['name'] - return None + stream_info = cloudcast['streamInfo'] + formats = [] - def _get_url(self, template_url): - return self.check_urls(template_url % i for i in range(30)) + for url_key in ('url', 'hlsUrl', 'dashUrl'): + format_url = stream_info.get(url_key) + if not format_url: + continue + decrypted = self._decrypt_xor_cipher( + self._DECRYPTION_KEY, compat_b64decode(format_url)) + if url_key == 'hlsUrl': + formats.extend(self._extract_m3u8_formats( + decrypted, track_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif url_key == 'dashUrl': + formats.extend(self._extract_mpd_formats( + decrypted, track_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'format_id': 'http', + 'url': decrypted, + 'downloader_options': { + # Mixcloud starts throttling at >~5M + 'http_chunk_size': 5242880, + }, + }) - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader = mobj.group(1) - cloudcast_name = mobj.group(2) - track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) - - webpage = self._download_webpage(url, track_id) - - preview_url = self._search_regex( - r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') - song_url = preview_url.replace('/previews/', '/c/originals/') - template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) - final_song_url = self._get_url(template_url) - if final_song_url is None: - self.to_screen('Trying with m4a extension') - template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') - final_song_url = self._get_url(template_url) - if final_song_url is None: - raise ExtractorError('Unable to extract track url') - - PREFIX = ( - r'