]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mixcloud.py
8245b5583839c90fc5580785010b9907b1a12088
   5 from .common 
import InfoExtractor
 
  10     compat_urllib_request
, 
  16 class MixcloudIE(InfoExtractor
): 
  17     _WORKING 
= False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ 
  18     _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' 
  21     def report_download_json(self
, file_id
): 
  22         """Report JSON download.""" 
  23         self
.to_screen(u
'Downloading json') 
  25     def get_urls(self
, jsonData
, fmt
, bitrate
='best'): 
  26         """Get urls from 'audio_formats' section in json""" 
  28             bitrate_list 
= jsonData
[fmt
] 
  29             if bitrate 
is None or bitrate 
== 'best' or bitrate 
not in bitrate_list
: 
  30                 bitrate 
= max(bitrate_list
) # select highest 
  32             url_list 
= jsonData
[fmt
][bitrate
] 
  33         except TypeError: # we have no bitrate info. 
  34             url_list 
= jsonData
[fmt
] 
  37     def check_urls(self
, url_list
): 
  38         """Returns 1st active url from list""" 
  41                 compat_urllib_request
.urlopen(url
) 
  43             except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
): 
  48     def _print_formats(self
, formats
): 
  49         print('Available formats:') 
  50         for fmt 
in formats
.keys(): 
  51             for b 
in formats
[fmt
]: 
  53                     ext 
= formats
[fmt
][b
][0] 
  54                     print('%s\t%s\t[%s]' % (fmt
, b
, ext
.split('.')[-1])) 
  55                 except TypeError: # we have no bitrate info 
  57                     print('%s\t%s\t[%s]' % (fmt
, '??', ext
.split('.')[-1])) 
  60     def _real_extract(self
, url
): 
  61         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  63             raise ExtractorError(u
'Invalid URL: %s' % url
) 
  64         # extract uploader & filename from url 
  65         uploader 
= mobj
.group(1).decode('utf-8') 
  66         file_id 
= uploader 
+ "-" + mobj
.group(2).decode('utf-8') 
  68         # construct API request 
  69         file_url 
= 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url
.split('/')[-3:-1]) + '.json' 
  70         # retrieve .json file with links to files 
  71         request 
= compat_urllib_request
.Request(file_url
) 
  73             self
.report_download_json(file_url
) 
  74             jsonData 
= compat_urllib_request
.urlopen(request
).read() 
  75         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  76             raise ExtractorError(u
'Unable to retrieve file: %s' % compat_str(err
)) 
  79         json_data 
= json
.loads(jsonData
) 
  80         player_url 
= json_data
['player_swf_url'] 
  81         formats 
= dict(json_data
['audio_formats']) 
  83         req_format 
= self
._downloader
.params
.get('format', None) 
  85         if self
._downloader
.params
.get('listformats', None): 
  86             self
._print
_formats
(formats
) 
  89         if req_format 
is None or req_format 
== 'best': 
  90             for format_param 
in formats
.keys(): 
  91                 url_list 
= self
.get_urls(formats
, format_param
) 
  93                 file_url 
= self
.check_urls(url_list
) 
  94                 if file_url 
is not None: 
  97             if req_format 
not in formats
: 
  98                 raise ExtractorError(u
'Format is not available') 
 100             url_list 
= self
.get_urls(formats
, req_format
) 
 101             file_url 
= self
.check_urls(url_list
) 
 102             format_param 
= req_format
 
 105             'id': file_id
.decode('utf-8'), 
 106             'url': file_url
.decode('utf-8'), 
 107             'uploader': uploader
.decode('utf-8'), 
 109             'title': json_data
['name'], 
 110             'ext': file_url
.split('.')[-1].decode('utf-8'), 
 111             'format': (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
 112             'thumbnail': json_data
['thumbnail_url'], 
 113             'description': json_data
['description'], 
 114             'player_url': player_url
.decode('utf-8'),