1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_urllib_parse_unquote
 
  14 class MixcloudIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' 
  19         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 
  21             'id': 'dholbach-cryptkeeper', 
  23             'title': 'Cryptkeeper', 
  24             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 
  25             'uploader': 'Daniel Holbach', 
  26             'uploader_id': 'dholbach', 
  27             'thumbnail': 're:https?://.*\.jpg', 
  32         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', 
  34             'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', 
  36             'title': 'Caribou 7 inch Vinyl Mix & Chat', 
  37             'description': 'md5:2b8aec6adce69f9d41724647c65875e8', 
  38             'uploader': 'Gilles Peterson Worldwide', 
  39             'uploader_id': 'gillespeterson', 
  40             'thumbnail': 're:https?://.*/images/', 
  46     def _check_url(self
, url
, track_id
, ext
): 
  48             # We only want to know if the request succeed 
  49             # don't download the whole file 
  50             self
._request
_webpage
( 
  51                 HEADRequest(url
), track_id
, 
  52                 'Trying %s URL' % ext
) 
  54         except ExtractorError
: 
  57     def _real_extract(self
, url
): 
  58         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  59         uploader 
= mobj
.group(1) 
  60         cloudcast_name 
= mobj
.group(2) 
  61         track_id 
= compat_urllib_parse_unquote('-'.join((uploader
, cloudcast_name
))) 
  63         webpage 
= self
._download
_webpage
(url
, track_id
) 
  65         preview_url 
= self
._search
_regex
( 
  66             r
'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage
, 'preview url') 
  67         song_url 
= re
.sub(r
'audiocdn(\d+)', r
'stream\1', preview_url
) 
  68         song_url 
= song_url
.replace('/previews/', '/c/originals/') 
  69         if not self
._check
_url
(song_url
, track_id
, 'mp3'): 
  70             song_url 
= song_url
.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') 
  71             if not self
._check
_url
(song_url
, track_id
, 'm4a'): 
  72                 raise ExtractorError('Unable to extract track url') 
  75             r
'm-play-on-spacebar[^>]+' 
  76             r
'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') 
  77         title 
= self
._html
_search
_regex
( 
  78             PREFIX 
+ r
'm-title="([^"]+)"', webpage
, 'title') 
  79         thumbnail 
= self
._proto
_relative
_url
(self
._html
_search
_regex
( 
  80             PREFIX 
+ r
'm-thumbnail-url="([^"]+)"', webpage
, 'thumbnail', 
  82         uploader 
= self
._html
_search
_regex
( 
  83             PREFIX 
+ r
'm-owner-name="([^"]+)"', 
  84             webpage
, 'uploader', fatal
=False) 
  85         uploader_id 
= self
._search
_regex
( 
  86             r
'\s+"profile": "([^"]+)",', webpage
, 'uploader id', fatal
=False) 
  87         description 
= self
._og
_search
_description
(webpage
) 
  88         like_count 
= str_to_int(self
._search
_regex
( 
  89             r
'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"', 
  90             webpage
, 'like count', fatal
=False)) 
  91         view_count 
= str_to_int(self
._search
_regex
( 
  92             [r
'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', 
  93              r
'/listeners/?">([0-9,.]+)</a>'], 
  94             webpage
, 'play count', fatal
=False)) 
 100             'description': description
, 
 101             'thumbnail': thumbnail
, 
 102             'uploader': uploader
, 
 103             'uploader_id': uploader_id
, 
 104             'view_count': view_count
, 
 105             'like_count': like_count
,