Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vidme.py

   1 from __future__ import unicode_literals
   2
   3 import itertools
   4
   5 from .common import InfoExtractor
   6 from ..compat import compat_HTTPError
   7 from ..utils import (
   8     ExtractorError,
   9     int_or_none,
  10     float_or_none,
  11     parse_iso8601,
  12 )
  13
  14
  15 class VidmeIE(InfoExtractor):
  16     IE_NAME = 'vidme'
  17     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
  18     _TESTS = [{
  19         'url': 'https://vid.me/QNB',
  20         'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
  21         'info_dict': {
  22             'id': 'QNB',
  23             'ext': 'mp4',
  24             'title': 'Fishing for piranha - the easy way',
  25             'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
  26             'thumbnail': 're:^https?://.*\.jpg',
  27             'timestamp': 1406313244,
  28             'upload_date': '20140725',
  29             'age_limit': 0,
  30             'duration': 119.92,
  31             'view_count': int,
  32             'like_count': int,
  33             'comment_count': int,
  34         },
  35     }, {
  36         'url': 'https://vid.me/Gc6M',
  37         'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
  38         'info_dict': {
  39             'id': 'Gc6M',
  40             'ext': 'mp4',
  41             'title': 'O Mere Dil ke chain - Arnav and Khushi VM',
  42             'thumbnail': 're:^https?://.*\.jpg',
  43             'timestamp': 1441211642,
  44             'upload_date': '20150902',
  45             'uploader': 'SunshineM',
  46             'uploader_id': '3552827',
  47             'age_limit': 0,
  48             'duration': 223.72,
  49             'view_count': int,
  50             'like_count': int,
  51             'comment_count': int,
  52         },
  53         'params': {
  54             'skip_download': True,
  55         },
  56     }, {
  57         # tests uploader field
  58         'url': 'https://vid.me/4Iib',
  59         'info_dict': {
  60             'id': '4Iib',
  61             'ext': 'mp4',
  62             'title': 'The Carver',
  63             'description': 'md5:e9c24870018ae8113be936645b93ba3c',
  64             'thumbnail': 're:^https?://.*\.jpg',
  65             'timestamp': 1433203629,
  66             'upload_date': '20150602',
  67             'uploader': 'Thomas',
  68             'uploader_id': '109747',
  69             'age_limit': 0,
  70             'duration': 97.859999999999999,
  71             'view_count': int,
  72             'like_count': int,
  73             'comment_count': int,
  74         },
  75         'params': {
  76             'skip_download': True,
  77         },
  78     }, {
  79         # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
  80         'url': 'https://vid.me/e/Wmur',
  81         'info_dict': {
  82             'id': 'Wmur',
  83             'ext': 'mp4',
  84             'title': 'naked smoking & stretching',
  85             'thumbnail': 're:^https?://.*\.jpg',
  86             'timestamp': 1430931613,
  87             'upload_date': '20150506',
  88             'uploader': 'naked-yogi',
  89             'uploader_id': '1638622',
  90             'age_limit': 18,
  91             'duration': 653.26999999999998,
  92             'view_count': int,
  93             'like_count': int,
  94             'comment_count': int,
  95         },
  96         'params': {
  97             'skip_download': True,
  98         },
  99     }, {
 100         # nsfw, user-disabled
 101         'url': 'https://vid.me/dzGJ',
 102         'only_matching': True,
 103     }, {
 104         # suspended
 105         'url': 'https://vid.me/Ox3G',
 106         'only_matching': True,
 107     }, {
 108         # deleted
 109         'url': 'https://vid.me/KTPm',
 110         'only_matching': True,
 111     }, {
 112         # no formats in the API response
 113         'url': 'https://vid.me/e5g',
 114         'info_dict': {
 115             'id': 'e5g',
 116             'ext': 'mp4',
 117             'title': 'Video upload (e5g)',
 118             'thumbnail': 're:^https?://.*\.jpg',
 119             'timestamp': 1401480195,
 120             'upload_date': '20140530',
 121             'uploader': None,
 122             'uploader_id': None,
 123             'age_limit': 0,
 124             'duration': 483,
 125             'view_count': int,
 126             'like_count': int,
 127             'comment_count': int,
 128         },
 129         'params': {
 130             'skip_download': True,
 131         },
 132     }]
 133
 134     def _real_extract(self, url):
 135         video_id = self._match_id(url)
 136
 137         try:
 138             response = self._download_json(
 139                 'https://api.vid.me/videoByUrl/%s' % video_id, video_id)
 140         except ExtractorError as e:
 141             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
 142                 response = self._parse_json(e.cause.read(), video_id)
 143             else:
 144                 raise
 145
 146         error = response.get('error')
 147         if error:
 148             raise ExtractorError(
 149                 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
 150
 151         video = response['video']
 152
 153         if video.get('state') == 'deleted':
 154             raise ExtractorError(
 155                 'Vidme said: Sorry, this video has been deleted.',
 156                 expected=True)
 157
 158         if video.get('state') in ('user-disabled', 'suspended'):
 159             raise ExtractorError(
 160                 'Vidme said: This video has been suspended either due to a copyright claim, '
 161                 'or for violating the terms of use.',
 162                 expected=True)
 163
 164         formats = [{
 165             'format_id': f.get('type'),
 166             'url': f['uri'],
 167             'width': int_or_none(f.get('width')),
 168             'height': int_or_none(f.get('height')),
 169             'preference': 0 if f.get('type', '').endswith('clip') else 1,
 170         } for f in video.get('formats', []) if f.get('uri')]
 171
 172         if not formats and video.get('complete_url'):
 173             formats.append({
 174                 'url': video.get('complete_url'),
 175                 'width': int_or_none(video.get('width')),
 176                 'height': int_or_none(video.get('height')),
 177             })
 178
 179         self._sort_formats(formats)
 180
 181         title = video['title']
 182         description = video.get('description')
 183         thumbnail = video.get('thumbnail_url')
 184         timestamp = parse_iso8601(video.get('date_created'), ' ')
 185         uploader = video.get('user', {}).get('username')
 186         uploader_id = video.get('user', {}).get('user_id')
 187         age_limit = 18 if video.get('nsfw') is True else 0
 188         duration = float_or_none(video.get('duration'))
 189         view_count = int_or_none(video.get('view_count'))
 190         like_count = int_or_none(video.get('likes_count'))
 191         comment_count = int_or_none(video.get('comment_count'))
 192
 193         return {
 194             'id': video_id,
 195             'title': title or 'Video upload (%s)' % video_id,
 196             'description': description,
 197             'thumbnail': thumbnail,
 198             'uploader': uploader,
 199             'uploader_id': uploader_id,
 200             'age_limit': age_limit,
 201             'timestamp': timestamp,
 202             'duration': duration,
 203             'view_count': view_count,
 204             'like_count': like_count,
 205             'comment_count': comment_count,
 206             'formats': formats,
 207         }
 208
 209
 210 class VidmeListBaseIE(InfoExtractor):
 211     # Max possible limit according to https://docs.vid.me/#api-Videos-List
 212     _LIMIT = 100
 213
 214     def _entries(self, user_id, user_name):
 215         for page_num in itertools.count(1):
 216             page = self._download_json(
 217                 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
 218                 % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
 219                 user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
 220
 221             videos = page.get('videos', [])
 222             if not videos:
 223                 break
 224
 225             for video in videos:
 226                 video_url = video.get('full_url') or video.get('embed_url')
 227                 if video_url:
 228                     yield self.url_result(video_url, VidmeIE.ie_key())
 229
 230             total = int_or_none(page.get('page', {}).get('total'))
 231             if total and self._LIMIT * page_num >= total:
 232                 break
 233
 234     def _real_extract(self, url):
 235         user_name = self._match_id(url)
 236
 237         user_id = self._download_json(
 238             'https://api.vid.me/userByUsername?username=%s' % user_name,
 239             user_name)['user']['user_id']
 240
 241         return self.playlist_result(
 242             self._entries(user_id, user_name), user_id,
 243             '%s - %s' % (user_name, self._TITLE))
 244
 245
 246 class VidmeUserIE(VidmeListBaseIE):
 247     IE_NAME = 'vidme:user'
 248     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
 249     _API_ITEM = 'list'
 250     _TITLE = 'Videos'
 251     _TEST = {
 252         'url': 'https://vid.me/EFARCHIVE',
 253         'info_dict': {
 254             'id': '3834632',
 255             'title': 'EFARCHIVE - %s' % _TITLE,
 256         },
 257         'playlist_mincount': 238,
 258     }
 259
 260
 261 class VidmeUserLikesIE(VidmeListBaseIE):
 262     IE_NAME = 'vidme:user:likes'
 263     _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
 264     _API_ITEM = 'likes'
 265     _TITLE = 'Likes'
 266     _TEST = {
 267         'url': 'https://vid.me/ErinAlexis/likes',
 268         'info_dict': {
 269             'id': '6483530',
 270             'title': 'ErinAlexis - %s' % _TITLE,
 271         },
 272         'playlist_mincount': 415,
 273     }