]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hitbox.py
debian/control: Mark compliance with policy 4.1.1. No further changes needed.
[youtubedl] / youtube_dl / extractor / hitbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 clean_html,
9 parse_iso8601,
10 float_or_none,
11 int_or_none,
12 compat_str,
13 determine_ext,
14 )
15
16
17 class HitboxIE(InfoExtractor):
18 IE_NAME = 'hitbox'
19 _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
20 _TESTS = [{
21 'url': 'http://www.hitbox.tv/video/203213',
22 'info_dict': {
23 'id': '203213',
24 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
25 'alt_title': 'hitboxlive - Aug 9th #6',
26 'description': '',
27 'ext': 'mp4',
28 'thumbnail': r're:^https?://.*\.jpg$',
29 'duration': 215.1666,
30 'resolution': 'HD 720p',
31 'uploader': 'hitboxlive',
32 'view_count': int,
33 'timestamp': 1407576133,
34 'upload_date': '20140809',
35 'categories': ['Live Show'],
36 },
37 'params': {
38 # m3u8 download
39 'skip_download': True,
40 },
41 }, {
42 'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
43 'only_matching': True,
44 }]
45
46 def _extract_metadata(self, url, video_id):
47 thumb_base = 'https://edge.sf.hitbox.tv'
48 metadata = self._download_json(
49 '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
50
51 date = 'media_live_since'
52 media_type = 'livestream'
53 if metadata.get('media_type') == 'video':
54 media_type = 'video'
55 date = 'media_date_added'
56
57 video_meta = metadata.get(media_type, [])[0]
58 title = video_meta.get('media_status')
59 alt_title = video_meta.get('media_title')
60 description = clean_html(
61 video_meta.get('media_description') or
62 video_meta.get('media_description_md'))
63 duration = float_or_none(video_meta.get('media_duration'))
64 uploader = video_meta.get('media_user_name')
65 views = int_or_none(video_meta.get('media_views'))
66 timestamp = parse_iso8601(video_meta.get(date), ' ')
67 categories = [video_meta.get('category_name')]
68 thumbs = [{
69 'url': thumb_base + video_meta.get('media_thumbnail'),
70 'width': 320,
71 'height': 180
72 }, {
73 'url': thumb_base + video_meta.get('media_thumbnail_large'),
74 'width': 768,
75 'height': 432
76 }]
77
78 return {
79 'id': video_id,
80 'title': title,
81 'alt_title': alt_title,
82 'description': description,
83 'ext': 'mp4',
84 'thumbnails': thumbs,
85 'duration': duration,
86 'uploader': uploader,
87 'view_count': views,
88 'timestamp': timestamp,
89 'categories': categories,
90 }
91
92 def _real_extract(self, url):
93 video_id = self._match_id(url)
94
95 player_config = self._download_json(
96 'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
97 video_id, 'Downloading video JSON')
98
99 formats = []
100 for video in player_config['clip']['bitrates']:
101 label = video.get('label')
102 if label == 'Auto':
103 continue
104 video_url = video.get('url')
105 if not video_url:
106 continue
107 bitrate = int_or_none(video.get('bitrate'))
108 if determine_ext(video_url) == 'm3u8':
109 if not video_url.startswith('http'):
110 continue
111 formats.append({
112 'url': video_url,
113 'ext': 'mp4',
114 'tbr': bitrate,
115 'format_note': label,
116 'protocol': 'm3u8_native',
117 })
118 else:
119 formats.append({
120 'url': video_url,
121 'tbr': bitrate,
122 'format_note': label,
123 })
124 self._sort_formats(formats)
125
126 metadata = self._extract_metadata(
127 'https://www.smashcast.tv/api/media/video', video_id)
128 metadata['formats'] = formats
129
130 return metadata
131
132
133 class HitboxLiveIE(HitboxIE):
134 IE_NAME = 'hitbox:live'
135 _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
136 _TESTS = [{
137 'url': 'http://www.hitbox.tv/dimak',
138 'info_dict': {
139 'id': 'dimak',
140 'ext': 'mp4',
141 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
142 'timestamp': int,
143 'upload_date': compat_str,
144 'title': compat_str,
145 'uploader': 'Dimak',
146 },
147 'params': {
148 # live
149 'skip_download': True,
150 },
151 }, {
152 'url': 'https://www.smashcast.tv/dimak',
153 'only_matching': True,
154 }]
155
156 @classmethod
157 def suitable(cls, url):
158 return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
159
160 def _real_extract(self, url):
161 video_id = self._match_id(url)
162
163 player_config = self._download_json(
164 'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
165 video_id)
166
167 formats = []
168 cdns = player_config.get('cdns')
169 servers = []
170 for cdn in cdns:
171 # Subscribe URLs are not playable
172 if cdn.get('rtmpSubscribe') is True:
173 continue
174 base_url = cdn.get('netConnectionUrl')
175 host = re.search(r'.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
176 if base_url not in servers:
177 servers.append(base_url)
178 for stream in cdn.get('bitrates'):
179 label = stream.get('label')
180 if label == 'Auto':
181 continue
182 stream_url = stream.get('url')
183 if not stream_url:
184 continue
185 bitrate = int_or_none(stream.get('bitrate'))
186 if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
187 if not stream_url.startswith('http'):
188 continue
189 formats.append({
190 'url': stream_url,
191 'ext': 'mp4',
192 'tbr': bitrate,
193 'format_note': label,
194 'rtmp_live': True,
195 })
196 else:
197 formats.append({
198 'url': '%s/%s' % (base_url, stream_url),
199 'ext': 'mp4',
200 'tbr': bitrate,
201 'rtmp_live': True,
202 'format_note': host,
203 'page_url': url,
204 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
205 })
206 self._sort_formats(formats)
207
208 metadata = self._extract_metadata(
209 'https://www.smashcast.tv/api/media/live', video_id)
210 metadata['formats'] = formats
211 metadata['is_live'] = True
212 metadata['title'] = self._live_title(metadata.get('title'))
213
214 return metadata