]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mixcloud.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  17 class MixcloudIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' 
  22         'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 
  24             'id': 'dholbach-cryptkeeper', 
  26             'title': 'Cryptkeeper', 
  27             'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 
  28             'uploader': 'Daniel Holbach', 
  29             'uploader_id': 'dholbach', 
  30             'upload_date': '20111115', 
  31             'timestamp': 1321359578, 
  32             'thumbnail': 're:https?://.*\.jpg', 
  37         'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', 
  39             'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', 
  41             'title': 'Electric Relaxation vol. 3', 
  42             'description': 'md5:2b8aec6adce69f9d41724647c65875e8', 
  43             'uploader': 'Daniel Drumz', 
  44             'uploader_id': 'gillespeterson', 
  45             'thumbnail': 're:https?://.*\.jpg', 
  51     def _get_url(self
, track_id
, template_url
): 
  53         for i 
in range(server_count
): 
  54             url 
= template_url 
% i
 
  56                 # We only want to know if the request succeed 
  57                 # don't download the whole file 
  58                 self
._request
_webpage
( 
  59                     HEADRequest(url
), track_id
, 
  60                     'Checking URL %d/%d ...' % (i 
+ 1, server_count 
+ 1)) 
  62             except ExtractorError
: 
  67     def _real_extract(self
, url
): 
  68         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  69         uploader 
= mobj
.group(1) 
  70         cloudcast_name 
= mobj
.group(2) 
  71         track_id 
= compat_urllib_parse
.unquote('-'.join((uploader
, cloudcast_name
))) 
  73         webpage 
= self
._download
_webpage
(url
, track_id
) 
  75         preview_url 
= self
._search
_regex
( 
  76             r
'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage
, 'preview url') 
  77         song_url 
= preview_url
.replace('/previews/', '/c/originals/') 
  78         template_url 
= re
.sub(r
'(stream\d*)', 'stream%d', song_url
) 
  79         final_song_url 
= self
._get
_url
(track_id
, template_url
) 
  80         if final_song_url 
is None: 
  81             self
.to_screen('Trying with m4a extension') 
  82             template_url 
= template_url
.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') 
  83             final_song_url 
= self
._get
_url
(track_id
, template_url
) 
  84         if final_song_url 
is None: 
  85             raise ExtractorError('Unable to extract track url') 
  88             r
'<span class="play-button[^"]*?"' 
  89             r
'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') 
  90         title 
= self
._html
_search
_regex
( 
  91             PREFIX 
+ r
'm-title="([^"]+)"', webpage
, 'title') 
  92         thumbnail 
= self
._proto
_relative
_url
(self
._html
_search
_regex
( 
  93             PREFIX 
+ r
'm-thumbnail-url="([^"]+)"', webpage
, 'thumbnail', 
  95         uploader 
= self
._html
_search
_regex
( 
  96             PREFIX 
+ r
'm-owner-name="([^"]+)"', 
  97             webpage
, 'uploader', fatal
=False) 
  98         uploader_id 
= self
._search
_regex
( 
  99             r
'\s+"profile": "([^"]+)",', webpage
, 'uploader id', fatal
=False) 
 100         description 
= self
._og
_search
_description
(webpage
) 
 101         like_count 
= str_to_int(self
._search
_regex
( 
 102             [r
'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', 
 103              r
'/favorites/?">([0-9]+)<'], 
 104             webpage
, 'like count', fatal
=False)) 
 105         view_count 
= str_to_int(self
._search
_regex
( 
 106             [r
'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', 
 107              r
'/listeners/?">([0-9,.]+)</a>'], 
 108             webpage
, 'play count', fatal
=False)) 
 109         timestamp 
= parse_iso8601(self
._search
_regex
( 
 110             r
'<time itemprop="dateCreated" datetime="([^"]+)">', 
 111             webpage
, 'upload date', default
=None)) 
 116             'url': final_song_url
, 
 117             'description': description
, 
 118             'thumbnail': thumbnail
, 
 119             'uploader': uploader
, 
 120             'uploader_id': uploader_id
, 
 121             'timestamp': timestamp
, 
 122             'view_count': view_count
, 
 123             'like_count': like_count
,