Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/viki.py

   1 from __future__ import unicode_literals
   2
   3 import time
   4 import hmac
   5 import hashlib
   6 import itertools
   7
   8 from ..utils import (
   9     ExtractorError,
  10     int_or_none,
  11     parse_age_limit,
  12     parse_iso8601,
  13 )
  14 from .common import InfoExtractor
  15
  16
  17 class VikiBaseIE(InfoExtractor):
  18     _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
  19     _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
  20     _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
  21
  22     _APP = '65535a'
  23     _APP_VERSION = '2.2.5.1428709186'
  24     _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
  25
  26     def _prepare_call(self, path, timestamp=None):
  27         path += '?' if '?' not in path else '&'
  28         if not timestamp:
  29             timestamp = int(time.time())
  30         query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
  31         sig = hmac.new(
  32             self._APP_SECRET.encode('ascii'),
  33             query.encode('ascii'),
  34             hashlib.sha1
  35         ).hexdigest()
  36         return self._API_URL_TEMPLATE % (query, sig)
  37
  38     def _call_api(self, path, video_id, note, timestamp=None):
  39         resp = self._download_json(
  40             self._prepare_call(path, timestamp), video_id, note)
  41
  42         error = resp.get('error')
  43         if error:
  44             if error == 'invalid timestamp':
  45                 resp = self._download_json(
  46                     self._prepare_call(path, int(resp['current_timestamp'])),
  47                     video_id, '%s (retry)' % note)
  48                 error = resp.get('error')
  49             if error:
  50                 self._raise_error(resp['error'])
  51
  52         return resp
  53
  54     def _raise_error(self, error):
  55         raise ExtractorError(
  56             '%s returned error: %s' % (self.IE_NAME, error),
  57             expected=True)
  58
  59
  60 class VikiIE(VikiBaseIE):
  61     IE_NAME = 'viki'
  62     _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
  63     _TESTS = [{
  64         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  65         'info_dict': {
  66             'id': '1023585v',
  67             'ext': 'mp4',
  68             'title': 'Heirs Episode 14',
  69             'uploader': 'SBS',
  70             'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  71             'upload_date': '20131121',
  72             'age_limit': 13,
  73         },
  74         'skip': 'Blocked in the US',
  75     }, {
  76         # clip
  77         'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
  78         'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
  79         'info_dict': {
  80             'id': '1067139v',
  81             'ext': 'mp4',
  82             'title': "'The Avengers: Age of Ultron' Press Conference",
  83             'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
  84             'duration': 352,
  85             'timestamp': 1430380829,
  86             'upload_date': '20150430',
  87             'uploader': 'Arirang TV',
  88             'like_count': int,
  89             'age_limit': 0,
  90         }
  91     }, {
  92         'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
  93         'info_dict': {
  94             'id': '1048879v',
  95             'ext': 'mp4',
  96             'title': 'Ankhon Dekhi',
  97             'duration': 6512,
  98             'timestamp': 1408532356,
  99             'upload_date': '20140820',
 100             'uploader': 'Spuul',
 101             'like_count': int,
 102             'age_limit': 13,
 103         },
 104         'params': {
 105             # m3u8 download
 106             'skip_download': True,
 107         }
 108     }, {
 109         # episode
 110         'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
 111         'md5': '190f3ef426005ba3a080a63325955bc3',
 112         'info_dict': {
 113             'id': '44699v',
 114             'ext': 'mp4',
 115             'title': 'Boys Over Flowers - Episode 1',
 116             'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2',
 117             'duration': 4155,
 118             'timestamp': 1270496524,
 119             'upload_date': '20100405',
 120             'uploader': 'group8',
 121             'like_count': int,
 122             'age_limit': 13,
 123         }
 124     }, {
 125         # youtube external
 126         'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
 127         'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
 128         'info_dict': {
 129             'id': '50562v',
 130             'ext': 'mp4',
 131             'title': 'Poor Nastya [COMPLETE] - Episode 1',
 132             'description': '',
 133             'duration': 607,
 134             'timestamp': 1274949505,
 135             'upload_date': '20101213',
 136             'uploader': 'ad14065n',
 137             'uploader_id': 'ad14065n',
 138             'like_count': int,
 139             'age_limit': 13,
 140         }
 141     }, {
 142         'url': 'http://www.viki.com/player/44699v',
 143         'only_matching': True,
 144     }]
 145
 146     def _real_extract(self, url):
 147         video_id = self._match_id(url)
 148
 149         video = self._call_api(
 150             'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
 151
 152         title = None
 153         titles = video.get('titles')
 154         if titles:
 155             title = titles.get('en') or titles[titles.keys()[0]]
 156         if not title:
 157             title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
 158             container_titles = video.get('container', {}).get('titles')
 159             if container_titles:
 160                 container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
 161                 title = '%s - %s' % (container_title, title)
 162
 163         descriptions = video.get('descriptions')
 164         description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None
 165
 166         duration = int_or_none(video.get('duration'))
 167         timestamp = parse_iso8601(video.get('created_at'))
 168         uploader = video.get('author')
 169         like_count = int_or_none(video.get('likes', {}).get('count'))
 170         age_limit = parse_age_limit(video.get('rating'))
 171
 172         thumbnails = []
 173         for thumbnail_id, thumbnail in video.get('images', {}).items():
 174             thumbnails.append({
 175                 'id': thumbnail_id,
 176                 'url': thumbnail.get('url'),
 177             })
 178
 179         subtitles = {}
 180         for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
 181             subtitles[subtitle_lang] = [{
 182                 'ext': subtitles_format,
 183                 'url': self._prepare_call(
 184                     'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
 185             } for subtitles_format in ('srt', 'vtt')]
 186
 187         result = {
 188             'id': video_id,
 189             'title': title,
 190             'description': description,
 191             'duration': duration,
 192             'timestamp': timestamp,
 193             'uploader': uploader,
 194             'like_count': like_count,
 195             'age_limit': age_limit,
 196             'thumbnails': thumbnails,
 197             'subtitles': subtitles,
 198         }
 199
 200         streams = self._call_api(
 201             'videos/%s/streams.json' % video_id, video_id,
 202             'Downloading video streams JSON')
 203
 204         if 'external' in streams:
 205             result.update({
 206                 '_type': 'url_transparent',
 207                 'url': streams['external']['url'],
 208             })
 209             return result
 210
 211         formats = []
 212         for format_id, stream_dict in streams.items():
 213             height = self._search_regex(
 214                 r'^(\d+)[pP]$', format_id, 'height', default=None)
 215             for protocol, format_dict in stream_dict.items():
 216                 if format_id == 'm3u8':
 217                     formats = self._extract_m3u8_formats(
 218                         format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
 219                 else:
 220                     formats.append({
 221                         'url': format_dict['url'],
 222                         'format_id': '%s-%s' % (format_id, protocol),
 223                         'height': height,
 224                     })
 225         self._sort_formats(formats)
 226
 227         result['formats'] = formats
 228         return result
 229
 230
 231 class VikiChannelIE(VikiBaseIE):
 232     IE_NAME = 'viki:channel'
 233     _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
 234     _TESTS = [{
 235         'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
 236         'info_dict': {
 237             'id': '50c',
 238             'title': 'Boys Over Flowers',
 239             'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
 240         },
 241         'playlist_count': 70,
 242     }, {
 243         'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
 244         'info_dict': {
 245             'id': '1354c',
 246             'title': 'Poor Nastya [COMPLETE]',
 247             'description': 'md5:05bf5471385aa8b21c18ad450e350525',
 248         },
 249         'playlist_count': 127,
 250     }, {
 251         'url': 'http://www.viki.com/news/24569c-showbiz-korea',
 252         'only_matching': True,
 253     }, {
 254         'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
 255         'only_matching': True,
 256     }, {
 257         'url': 'http://www.viki.com/artists/2141c-shinee',
 258         'only_matching': True,
 259     }]
 260
 261     _PER_PAGE = 25
 262
 263     def _real_extract(self, url):
 264         channel_id = self._match_id(url)
 265
 266         channel = self._call_api(
 267             'containers/%s.json' % channel_id, channel_id,
 268             'Downloading channel JSON')
 269
 270         titles = channel['titles']
 271         title = titles.get('en') or titles[titles.keys()[0]]
 272
 273         descriptions = channel['descriptions']
 274         description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
 275
 276         entries = []
 277         for video_type in ('episodes', 'clips', 'movies'):
 278             for page_num in itertools.count(1):
 279                 page = self._call_api(
 280                     'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
 281                     % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
 282                     'Downloading %s JSON page #%d' % (video_type, page_num))
 283                 for video in page['response']:
 284                     video_id = video['id']
 285                     entries.append(self.url_result(
 286                         'http://www.viki.com/videos/%s' % video_id, 'Viki'))
 287                 if not page['pagination']['next']:
 288                     break
 289
 290         return self.playlist_result(entries, channel_id, title, description)