]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vidme.py
debian/control: Update list of extractors in long description.
[youtubedl] / youtube_dl / extractor / vidme.py
1 from __future__ import unicode_literals
2
3 import itertools
4
5 from .common import InfoExtractor
6 from ..compat import compat_HTTPError
7 from ..utils import (
8 ExtractorError,
9 int_or_none,
10 float_or_none,
11 parse_iso8601,
12 url_or_none,
13 )
14
15
16 class VidmeIE(InfoExtractor):
17 IE_NAME = 'vidme'
18 _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
19 _TESTS = [{
20 'url': 'https://vid.me/QNB',
21 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
22 'info_dict': {
23 'id': 'QNB',
24 'ext': 'mp4',
25 'title': 'Fishing for piranha - the easy way',
26 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
27 'thumbnail': r're:^https?://.*\.jpg',
28 'timestamp': 1406313244,
29 'upload_date': '20140725',
30 'age_limit': 0,
31 'duration': 119.92,
32 'view_count': int,
33 'like_count': int,
34 'comment_count': int,
35 },
36 }, {
37 'url': 'https://vid.me/Gc6M',
38 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
39 'info_dict': {
40 'id': 'Gc6M',
41 'ext': 'mp4',
42 'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
43 'thumbnail': r're:^https?://.*\.jpg',
44 'timestamp': 1441211642,
45 'upload_date': '20150902',
46 'uploader': 'SunshineM',
47 'uploader_id': '3552827',
48 'age_limit': 0,
49 'duration': 223.72,
50 'view_count': int,
51 'like_count': int,
52 'comment_count': int,
53 },
54 'params': {
55 'skip_download': True,
56 },
57 }, {
58 # tests uploader field
59 'url': 'https://vid.me/4Iib',
60 'info_dict': {
61 'id': '4Iib',
62 'ext': 'mp4',
63 'title': 'The Carver',
64 'description': 'md5:e9c24870018ae8113be936645b93ba3c',
65 'thumbnail': r're:^https?://.*\.jpg',
66 'timestamp': 1433203629,
67 'upload_date': '20150602',
68 'uploader': 'Thomas',
69 'uploader_id': '109747',
70 'age_limit': 0,
71 'duration': 97.859999999999999,
72 'view_count': int,
73 'like_count': int,
74 'comment_count': int,
75 },
76 'params': {
77 'skip_download': True,
78 },
79 }, {
80 # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
81 'url': 'https://vid.me/e/Wmur',
82 'info_dict': {
83 'id': 'Wmur',
84 'ext': 'mp4',
85 'title': 'naked smoking & stretching',
86 'thumbnail': r're:^https?://.*\.jpg',
87 'timestamp': 1430931613,
88 'upload_date': '20150506',
89 'uploader': 'naked-yogi',
90 'uploader_id': '1638622',
91 'age_limit': 18,
92 'duration': 653.26999999999998,
93 'view_count': int,
94 'like_count': int,
95 'comment_count': int,
96 },
97 'params': {
98 'skip_download': True,
99 },
100 }, {
101 # nsfw, user-disabled
102 'url': 'https://vid.me/dzGJ',
103 'only_matching': True,
104 }, {
105 # suspended
106 'url': 'https://vid.me/Ox3G',
107 'only_matching': True,
108 }, {
109 # deleted
110 'url': 'https://vid.me/KTPm',
111 'only_matching': True,
112 }, {
113 # no formats in the API response
114 'url': 'https://vid.me/e5g',
115 'info_dict': {
116 'id': 'e5g',
117 'ext': 'mp4',
118 'title': 'Video upload (e5g)',
119 'thumbnail': r're:^https?://.*\.jpg',
120 'timestamp': 1401480195,
121 'upload_date': '20140530',
122 'uploader': None,
123 'uploader_id': None,
124 'age_limit': 0,
125 'duration': 483,
126 'view_count': int,
127 'like_count': int,
128 'comment_count': int,
129 },
130 'params': {
131 'skip_download': True,
132 },
133 }]
134
135 def _real_extract(self, url):
136 video_id = self._match_id(url)
137
138 try:
139 response = self._download_json(
140 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
141 except ExtractorError as e:
142 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
143 response = self._parse_json(e.cause.read(), video_id)
144 else:
145 raise
146
147 error = response.get('error')
148 if error:
149 raise ExtractorError(
150 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
151
152 video = response['video']
153
154 if video.get('state') == 'deleted':
155 raise ExtractorError(
156 'Vidme said: Sorry, this video has been deleted.',
157 expected=True)
158
159 if video.get('state') in ('user-disabled', 'suspended'):
160 raise ExtractorError(
161 'Vidme said: This video has been suspended either due to a copyright claim, '
162 'or for violating the terms of use.',
163 expected=True)
164
165 formats = []
166 for f in video.get('formats', []):
167 format_url = url_or_none(f.get('uri'))
168 if not format_url:
169 continue
170 format_type = f.get('type')
171 if format_type == 'dash':
172 formats.extend(self._extract_mpd_formats(
173 format_url, video_id, mpd_id='dash', fatal=False))
174 elif format_type == 'hls':
175 formats.extend(self._extract_m3u8_formats(
176 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
177 m3u8_id='hls', fatal=False))
178 else:
179 formats.append({
180 'format_id': f.get('type'),
181 'url': format_url,
182 'width': int_or_none(f.get('width')),
183 'height': int_or_none(f.get('height')),
184 'preference': 0 if f.get('type', '').endswith(
185 'clip') else 1,
186 })
187
188 if not formats and video.get('complete_url'):
189 formats.append({
190 'url': video.get('complete_url'),
191 'width': int_or_none(video.get('width')),
192 'height': int_or_none(video.get('height')),
193 })
194
195 self._sort_formats(formats)
196
197 title = video['title']
198 description = video.get('description')
199 thumbnail = video.get('thumbnail_url')
200 timestamp = parse_iso8601(video.get('date_created'), ' ')
201 uploader = video.get('user', {}).get('username')
202 uploader_id = video.get('user', {}).get('user_id')
203 age_limit = 18 if video.get('nsfw') is True else 0
204 duration = float_or_none(video.get('duration'))
205 view_count = int_or_none(video.get('view_count'))
206 like_count = int_or_none(video.get('likes_count'))
207 comment_count = int_or_none(video.get('comment_count'))
208
209 return {
210 'id': video_id,
211 'title': title or 'Video upload (%s)' % video_id,
212 'description': description,
213 'thumbnail': thumbnail,
214 'uploader': uploader,
215 'uploader_id': uploader_id,
216 'age_limit': age_limit,
217 'timestamp': timestamp,
218 'duration': duration,
219 'view_count': view_count,
220 'like_count': like_count,
221 'comment_count': comment_count,
222 'formats': formats,
223 }
224
225
226 class VidmeListBaseIE(InfoExtractor):
227 # Max possible limit according to https://docs.vid.me/#api-Videos-List
228 _LIMIT = 100
229
230 def _entries(self, user_id, user_name):
231 for page_num in itertools.count(1):
232 page = self._download_json(
233 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
234 % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
235 user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
236
237 videos = page.get('videos', [])
238 if not videos:
239 break
240
241 for video in videos:
242 video_url = video.get('full_url') or video.get('embed_url')
243 if video_url:
244 yield self.url_result(video_url, VidmeIE.ie_key())
245
246 total = int_or_none(page.get('page', {}).get('total'))
247 if total and self._LIMIT * page_num >= total:
248 break
249
250 def _real_extract(self, url):
251 user_name = self._match_id(url)
252
253 user_id = self._download_json(
254 'https://api.vid.me/userByUsername?username=%s' % user_name,
255 user_name)['user']['user_id']
256
257 return self.playlist_result(
258 self._entries(user_id, user_name), user_id,
259 '%s - %s' % (user_name, self._TITLE))
260
261
262 class VidmeUserIE(VidmeListBaseIE):
263 IE_NAME = 'vidme:user'
264 _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
265 _API_ITEM = 'list'
266 _TITLE = 'Videos'
267 _TESTS = [{
268 'url': 'https://vid.me/MasakoX',
269 'info_dict': {
270 'id': '16112341',
271 'title': 'MasakoX - %s' % _TITLE,
272 },
273 'playlist_mincount': 191,
274 }, {
275 'url': 'https://vid.me/unsQuare_netWork',
276 'only_matching': True,
277 }]
278
279
280 class VidmeUserLikesIE(VidmeListBaseIE):
281 IE_NAME = 'vidme:user:likes'
282 _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
283 _API_ITEM = 'likes'
284 _TITLE = 'Likes'
285 _TESTS = [{
286 'url': 'https://vid.me/ErinAlexis/likes',
287 'info_dict': {
288 'id': '6483530',
289 'title': 'ErinAlexis - %s' % _TITLE,
290 },
291 'playlist_mincount': 415,
292 }, {
293 'url': 'https://vid.me/Kaleidoscope-Ish/likes',
294 'only_matching': True,
295 }]