]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vidme.py
New upstream version 2017.09.24
[youtubedl] / youtube_dl / extractor / vidme.py
1 from __future__ import unicode_literals
2
3 import itertools
4
5 from .common import InfoExtractor
6 from ..compat import (
7 compat_HTTPError,
8 compat_str,
9 )
10 from ..utils import (
11 ExtractorError,
12 int_or_none,
13 float_or_none,
14 parse_iso8601,
15 )
16
17
18 class VidmeIE(InfoExtractor):
19 IE_NAME = 'vidme'
20 _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
21 _TESTS = [{
22 'url': 'https://vid.me/QNB',
23 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
24 'info_dict': {
25 'id': 'QNB',
26 'ext': 'mp4',
27 'title': 'Fishing for piranha - the easy way',
28 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
29 'thumbnail': r're:^https?://.*\.jpg',
30 'timestamp': 1406313244,
31 'upload_date': '20140725',
32 'age_limit': 0,
33 'duration': 119.92,
34 'view_count': int,
35 'like_count': int,
36 'comment_count': int,
37 },
38 }, {
39 'url': 'https://vid.me/Gc6M',
40 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
41 'info_dict': {
42 'id': 'Gc6M',
43 'ext': 'mp4',
44 'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
45 'thumbnail': r're:^https?://.*\.jpg',
46 'timestamp': 1441211642,
47 'upload_date': '20150902',
48 'uploader': 'SunshineM',
49 'uploader_id': '3552827',
50 'age_limit': 0,
51 'duration': 223.72,
52 'view_count': int,
53 'like_count': int,
54 'comment_count': int,
55 },
56 'params': {
57 'skip_download': True,
58 },
59 }, {
60 # tests uploader field
61 'url': 'https://vid.me/4Iib',
62 'info_dict': {
63 'id': '4Iib',
64 'ext': 'mp4',
65 'title': 'The Carver',
66 'description': 'md5:e9c24870018ae8113be936645b93ba3c',
67 'thumbnail': r're:^https?://.*\.jpg',
68 'timestamp': 1433203629,
69 'upload_date': '20150602',
70 'uploader': 'Thomas',
71 'uploader_id': '109747',
72 'age_limit': 0,
73 'duration': 97.859999999999999,
74 'view_count': int,
75 'like_count': int,
76 'comment_count': int,
77 },
78 'params': {
79 'skip_download': True,
80 },
81 }, {
82 # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
83 'url': 'https://vid.me/e/Wmur',
84 'info_dict': {
85 'id': 'Wmur',
86 'ext': 'mp4',
87 'title': 'naked smoking & stretching',
88 'thumbnail': r're:^https?://.*\.jpg',
89 'timestamp': 1430931613,
90 'upload_date': '20150506',
91 'uploader': 'naked-yogi',
92 'uploader_id': '1638622',
93 'age_limit': 18,
94 'duration': 653.26999999999998,
95 'view_count': int,
96 'like_count': int,
97 'comment_count': int,
98 },
99 'params': {
100 'skip_download': True,
101 },
102 }, {
103 # nsfw, user-disabled
104 'url': 'https://vid.me/dzGJ',
105 'only_matching': True,
106 }, {
107 # suspended
108 'url': 'https://vid.me/Ox3G',
109 'only_matching': True,
110 }, {
111 # deleted
112 'url': 'https://vid.me/KTPm',
113 'only_matching': True,
114 }, {
115 # no formats in the API response
116 'url': 'https://vid.me/e5g',
117 'info_dict': {
118 'id': 'e5g',
119 'ext': 'mp4',
120 'title': 'Video upload (e5g)',
121 'thumbnail': r're:^https?://.*\.jpg',
122 'timestamp': 1401480195,
123 'upload_date': '20140530',
124 'uploader': None,
125 'uploader_id': None,
126 'age_limit': 0,
127 'duration': 483,
128 'view_count': int,
129 'like_count': int,
130 'comment_count': int,
131 },
132 'params': {
133 'skip_download': True,
134 },
135 }]
136
137 def _real_extract(self, url):
138 video_id = self._match_id(url)
139
140 try:
141 response = self._download_json(
142 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
143 except ExtractorError as e:
144 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
145 response = self._parse_json(e.cause.read(), video_id)
146 else:
147 raise
148
149 error = response.get('error')
150 if error:
151 raise ExtractorError(
152 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
153
154 video = response['video']
155
156 if video.get('state') == 'deleted':
157 raise ExtractorError(
158 'Vidme said: Sorry, this video has been deleted.',
159 expected=True)
160
161 if video.get('state') in ('user-disabled', 'suspended'):
162 raise ExtractorError(
163 'Vidme said: This video has been suspended either due to a copyright claim, '
164 'or for violating the terms of use.',
165 expected=True)
166
167 formats = []
168 for f in video.get('formats', []):
169 format_url = f.get('uri')
170 if not format_url or not isinstance(format_url, compat_str):
171 continue
172 format_type = f.get('type')
173 if format_type == 'dash':
174 formats.extend(self._extract_mpd_formats(
175 format_url, video_id, mpd_id='dash', fatal=False))
176 elif format_type == 'hls':
177 formats.extend(self._extract_m3u8_formats(
178 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
179 m3u8_id='hls', fatal=False))
180 else:
181 formats.append({
182 'format_id': f.get('type'),
183 'url': format_url,
184 'width': int_or_none(f.get('width')),
185 'height': int_or_none(f.get('height')),
186 'preference': 0 if f.get('type', '').endswith(
187 'clip') else 1,
188 })
189
190 if not formats and video.get('complete_url'):
191 formats.append({
192 'url': video.get('complete_url'),
193 'width': int_or_none(video.get('width')),
194 'height': int_or_none(video.get('height')),
195 })
196
197 self._sort_formats(formats)
198
199 title = video['title']
200 description = video.get('description')
201 thumbnail = video.get('thumbnail_url')
202 timestamp = parse_iso8601(video.get('date_created'), ' ')
203 uploader = video.get('user', {}).get('username')
204 uploader_id = video.get('user', {}).get('user_id')
205 age_limit = 18 if video.get('nsfw') is True else 0
206 duration = float_or_none(video.get('duration'))
207 view_count = int_or_none(video.get('view_count'))
208 like_count = int_or_none(video.get('likes_count'))
209 comment_count = int_or_none(video.get('comment_count'))
210
211 return {
212 'id': video_id,
213 'title': title or 'Video upload (%s)' % video_id,
214 'description': description,
215 'thumbnail': thumbnail,
216 'uploader': uploader,
217 'uploader_id': uploader_id,
218 'age_limit': age_limit,
219 'timestamp': timestamp,
220 'duration': duration,
221 'view_count': view_count,
222 'like_count': like_count,
223 'comment_count': comment_count,
224 'formats': formats,
225 }
226
227
228 class VidmeListBaseIE(InfoExtractor):
229 # Max possible limit according to https://docs.vid.me/#api-Videos-List
230 _LIMIT = 100
231
232 def _entries(self, user_id, user_name):
233 for page_num in itertools.count(1):
234 page = self._download_json(
235 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
236 % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
237 user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
238
239 videos = page.get('videos', [])
240 if not videos:
241 break
242
243 for video in videos:
244 video_url = video.get('full_url') or video.get('embed_url')
245 if video_url:
246 yield self.url_result(video_url, VidmeIE.ie_key())
247
248 total = int_or_none(page.get('page', {}).get('total'))
249 if total and self._LIMIT * page_num >= total:
250 break
251
252 def _real_extract(self, url):
253 user_name = self._match_id(url)
254
255 user_id = self._download_json(
256 'https://api.vid.me/userByUsername?username=%s' % user_name,
257 user_name)['user']['user_id']
258
259 return self.playlist_result(
260 self._entries(user_id, user_name), user_id,
261 '%s - %s' % (user_name, self._TITLE))
262
263
264 class VidmeUserIE(VidmeListBaseIE):
265 IE_NAME = 'vidme:user'
266 _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
267 _API_ITEM = 'list'
268 _TITLE = 'Videos'
269 _TESTS = [{
270 'url': 'https://vid.me/MasakoX',
271 'info_dict': {
272 'id': '16112341',
273 'title': 'MasakoX - %s' % _TITLE,
274 },
275 'playlist_mincount': 191,
276 }, {
277 'url': 'https://vid.me/unsQuare_netWork',
278 'only_matching': True,
279 }]
280
281
282 class VidmeUserLikesIE(VidmeListBaseIE):
283 IE_NAME = 'vidme:user:likes'
284 _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
285 _API_ITEM = 'likes'
286 _TITLE = 'Likes'
287 _TESTS = [{
288 'url': 'https://vid.me/ErinAlexis/likes',
289 'info_dict': {
290 'id': '6483530',
291 'title': 'ErinAlexis - %s' % _TITLE,
292 },
293 'playlist_mincount': 415,
294 }, {
295 'url': 'https://vid.me/Kaleidoscope-Ish/likes',
296 'only_matching': True,
297 }]