]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mixcloud.py
8245b5583839c90fc5580785010b9907b1a12088
5 from .common
import InfoExtractor
10 compat_urllib_request
,
16 class MixcloudIE(InfoExtractor
):
17 _WORKING
= False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
18 _VALID_URL
= r
'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
21 def report_download_json(self
, file_id
):
22 """Report JSON download."""
23 self
.to_screen(u
'Downloading json')
25 def get_urls(self
, jsonData
, fmt
, bitrate
='best'):
26 """Get urls from 'audio_formats' section in json"""
28 bitrate_list
= jsonData
[fmt
]
29 if bitrate
is None or bitrate
== 'best' or bitrate
not in bitrate_list
:
30 bitrate
= max(bitrate_list
) # select highest
32 url_list
= jsonData
[fmt
][bitrate
]
33 except TypeError: # we have no bitrate info.
34 url_list
= jsonData
[fmt
]
37 def check_urls(self
, url_list
):
38 """Returns 1st active url from list"""
41 compat_urllib_request
.urlopen(url
)
43 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
):
48 def _print_formats(self
, formats
):
49 print('Available formats:')
50 for fmt
in formats
.keys():
51 for b
in formats
[fmt
]:
53 ext
= formats
[fmt
][b
][0]
54 print('%s\t%s\t[%s]' % (fmt
, b
, ext
.split('.')[-1]))
55 except TypeError: # we have no bitrate info
57 print('%s\t%s\t[%s]' % (fmt
, '??', ext
.split('.')[-1]))
60 def _real_extract(self
, url
):
61 mobj
= re
.match(self
._VALID
_URL
, url
)
63 raise ExtractorError(u
'Invalid URL: %s' % url
)
64 # extract uploader & filename from url
65 uploader
= mobj
.group(1).decode('utf-8')
66 file_id
= uploader
+ "-" + mobj
.group(2).decode('utf-8')
68 # construct API request
69 file_url
= 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url
.split('/')[-3:-1]) + '.json'
70 # retrieve .json file with links to files
71 request
= compat_urllib_request
.Request(file_url
)
73 self
.report_download_json(file_url
)
74 jsonData
= compat_urllib_request
.urlopen(request
).read()
75 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
76 raise ExtractorError(u
'Unable to retrieve file: %s' % compat_str(err
))
79 json_data
= json
.loads(jsonData
)
80 player_url
= json_data
['player_swf_url']
81 formats
= dict(json_data
['audio_formats'])
83 req_format
= self
._downloader
.params
.get('format', None)
85 if self
._downloader
.params
.get('listformats', None):
86 self
._print
_formats
(formats
)
89 if req_format
is None or req_format
== 'best':
90 for format_param
in formats
.keys():
91 url_list
= self
.get_urls(formats
, format_param
)
93 file_url
= self
.check_urls(url_list
)
94 if file_url
is not None:
97 if req_format
not in formats
:
98 raise ExtractorError(u
'Format is not available')
100 url_list
= self
.get_urls(formats
, req_format
)
101 file_url
= self
.check_urls(url_list
)
102 format_param
= req_format
105 'id': file_id
.decode('utf-8'),
106 'url': file_url
.decode('utf-8'),
107 'uploader': uploader
.decode('utf-8'),
109 'title': json_data
['name'],
110 'ext': file_url
.split('.')[-1].decode('utf-8'),
111 'format': (format_param
is None and u
'NA' or format_param
.decode('utf-8')),
112 'thumbnail': json_data
['thumbnail_url'],
113 'description': json_data
['description'],
114 'player_url': player_url
.decode('utf-8'),