X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/03342304420e5daeb428ffdcc7bbd2bbfecfa61a..e22397a1c5329e12f3719961c9456e378f94dbb1:/youtube_dl/extractor/ivi.py diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 472d72b..b5a740a 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -1,21 +1,26 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals -import re import json +import re +import sys from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, - sanitized_Request, + qualities, ) class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['RU'] + _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c' + _LIGHT_URL = 'https://api.ivi.ru/light/' _TESTS = [ # Single movie @@ -28,7 +33,7 @@ class IviIE(InfoExtractor): 'title': 'Иван Васильевич меняет профессию', 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f', 'duration': 5498, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', }, @@ -46,55 +51,128 @@ class IviIE(InfoExtractor): 'episode': 'Дело Гольдберга (1 часть)', 'episode_number': 1, 'duration': 2655, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', - } + }, + { + # with MP4-HD720 format + 'url': 'http://www.ivi.ru/watch/146500', + 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e', + 'info_dict': { + 'id': '146500', + 'ext': 'mp4', + 'title': 'Кукла', + 'description': 'md5:ffca9372399976a2d260a407cc74cce6', + 'duration': 5599, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'skip': 'Only works from Russia', + }, + { + 'url': 'https://www.ivi.tv/watch/33560/', + 'only_matching': True, + }, ] # Sorted by quality - _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] + _KNOWN_FORMATS = ( + 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', + 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080') def _real_extract(self, url): video_id = self._match_id(url) - data = { + data = json.dumps({ 'method': 'da.content.get', 'params': [ video_id, { - 'site': 's183', + 'site': 's%d', 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, 'contentid': video_id } ] - } + }) - request = sanitized_Request( - 'http://api.digitalaccess.ru/api/json/', json.dumps(data)) - video_json = self._download_json( - request, video_id, 'Downloading video JSON') + bundled = hasattr(sys, 'frozen') - if 'error' in video_json: - error = video_json['error'] - if error['origin'] == 'NoRedisValidData': - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - raise ExtractorError( - 'Unable to download video %s: %s' % (video_id, error['message']), - expected=True) + for site in (353, 183): + content_data = (data % site).encode() + if site == 353: + if bundled: + continue + try: + from Cryptodome.Cipher import Blowfish + from Cryptodome.Hash import CMAC + pycryptodomex_found = True + except ImportError: + pycryptodomex_found = False + continue - result = video_json['result'] + timestamp = (self._download_json( + self._LIGHT_URL, video_id, + 'Downloading timestamp JSON', data=json.dumps({ + 'method': 'da.timestamp.get', + 'params': [] + }).encode(), fatal=False) or {}).get('result') + if not timestamp: + continue - formats = [{ - 'url': x['url'], - 'format_id': x['content_format'], - 'preference': self._KNOWN_FORMATS.index(x['content_format']), - } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS] + query = { + 'ts': timestamp, + 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(), + } + else: + query = {} - self._sort_formats(formats) + video_json = self._download_json( + self._LIGHT_URL, video_id, + 'Downloading video JSON', data=content_data, query=query) + error = video_json.get('error') + if error: + origin = error.get('origin') + message = error.get('message') or error.get('user_message') + extractor_msg = 'Unable to download video %s' + if origin == 'NotAllowedForLocation': + self.raise_geo_restricted(message, self._GEO_COUNTRIES) + elif origin == 'NoRedisValidData': + extractor_msg = 'Video %s does not exist' + elif site == 353: + continue + elif bundled: + raise ExtractorError( + 'This feature does not work from bundled exe. Run youtube-dl from sources.', + expected=True) + elif not pycryptodomex_found: + raise ExtractorError( + 'pycryptodomex not found. Please install it.', + expected=True) + elif message: + extractor_msg += ': ' + message + raise ExtractorError(extractor_msg % video_id, expected=True) + else: + break + + result = video_json['result'] title = result['title'] - duration = int_or_none(result.get('duration')) + quality = qualities(self._KNOWN_FORMATS) + + formats = [] + for f in result.get('files', []): + f_url = f.get('url') + content_format = f.get('content_format') + if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format: + continue + formats.append({ + 'url': f_url, + 'format_id': content_format, + 'quality': quality(content_format), + 'filesize': int_or_none(f.get('size_in_bytes')), + }) + self._sort_formats(formats) + compilation = result.get('compilation') episode = title if compilation else None @@ -115,7 +193,7 @@ class IviIE(InfoExtractor): webpage, 'season number', default=None)) episode_number = int_or_none(self._search_regex( - r']+itemprop="episode"[^>]*>\s*]+itemprop="episodeNumber"[^>]+content="(\d+)', + r'[^>]+itemprop="episode"[^>]*>\s*]+itemprop="episodeNumber"[^>]+content="(\d+)', webpage, 'episode number', default=None)) description = self._og_search_description(webpage, default=None) or self._html_search_meta( @@ -131,7 +209,7 @@ class IviIE(InfoExtractor): 'episode_number': episode_number, 'thumbnails': thumbnails, 'description': description, - 'duration': duration, + 'duration': int_or_none(result.get('duration')), 'formats': formats, } @@ -161,7 +239,7 @@ class IviCompilationIE(InfoExtractor): self.url_result( 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key()) for serie in re.findall( - r']+data-id="\1"' % compilation_id, html)] + r']+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url)