]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/thesixtyone.py
debian/control: Mark compliance with policy 4.0.1.
[youtubedl] / youtube_dl / extractor / thesixtyone.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import unified_strdate
6
7
8 class TheSixtyOneIE(InfoExtractor):
9 _VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/
10 (?:.*?/)*
11 (?:
12 s|
13 song/comments/list|
14 song
15 )/(?:[^/]+/)?(?P<id>[A-Za-z0-9]+)/?$'''
16 _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
17 _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream'
18 _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
19 _TESTS = [
20 {
21 'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/',
22 'md5': '821cc43b0530d3222e3e2b70bb4622ea',
23 'info_dict': {
24 'id': 'SrE3zD7s1jt',
25 'ext': 'mp3',
26 'title': 'CASIO - Unicorn War Mixtape',
27 'thumbnail': 're:^https?://.*_desktop$',
28 'upload_date': '20071217',
29 'duration': 3208,
30 }
31 },
32 {
33 'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt',
34 'only_matching': True,
35 },
36 {
37 'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/',
38 'only_matching': True,
39 },
40 {
41 'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/',
42 'only_matching': True,
43 },
44 {
45 'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
46 'only_matching': True,
47 },
48 {
49 'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/',
50 'only_matching': True,
51 },
52 ]
53
54 _DECODE_MAP = {
55 'x': 'a',
56 'm': 'b',
57 'w': 'c',
58 'q': 'd',
59 'n': 'e',
60 'p': 'f',
61 'a': '0',
62 'h': '1',
63 'e': '2',
64 'u': '3',
65 's': '4',
66 'i': '5',
67 'o': '6',
68 'y': '7',
69 'r': '8',
70 'c': '9'
71 }
72
73 def _real_extract(self, url):
74 song_id = self._match_id(url)
75
76 webpage = self._download_webpage(
77 self._SONG_URL_TEMPLATE.format(song_id), song_id)
78
79 song_data = self._parse_json(self._search_regex(
80 r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'), song_id)
81
82 if self._search_regex(r'(t61\.s3_audio_load\s*=\s*1\.0;)', webpage, 's3_audio_load marker', default=None):
83 song_data['audio_server'] = 's3.amazonaws.com'
84 else:
85 song_data['audio_server'] = song_data['audio_server'] + '.thesixtyone.com'
86
87 keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
88 url = self._SONG_FILE_URL_TEMPLATE.format(
89 "".join(reversed(keys)), **song_data)
90
91 formats = [{
92 'format_id': 'sd',
93 'url': url,
94 'ext': 'mp3',
95 }]
96
97 return {
98 'id': song_id,
99 'title': '{artist:} - {name:}'.format(**song_data),
100 'formats': formats,
101 'comment_count': song_data.get('comments_count'),
102 'duration': song_data.get('play_time'),
103 'like_count': song_data.get('score'),
104 'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data),
105 'upload_date': unified_strdate(song_data.get('publish_date')),
106 }