X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/87a0165ca7e39af4dacb7ec637063b2cd35ae40b..0f0afa2a182178f8c70d50daf99f864372fe71f3:/youtube_dl/extractor/eagleplatform.py?ds=inline diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index c2f593e..36fef07 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -4,13 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, -) +from ..compat import compat_HTTPError from ..utils import ( ExtractorError, int_or_none, + unsmuggle_url, + url_or_none, ) @@ -31,7 +30,7 @@ class EaglePlatformIE(InfoExtractor): 'ext': 'mp4', 'title': 'Навальный вышел на свободу', 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 87, 'view_count': int, 'age_limit': 0, @@ -45,11 +44,15 @@ class EaglePlatformIE(InfoExtractor): 'id': '12820', 'ext': 'mp4', 'title': "'O Sole Mio", - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 216, 'view_count': int, }, 'skip': 'Georestricted', + }, { + # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) + 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', + 'only_matching': True, }] @staticmethod @@ -60,16 +63,40 @@ class EaglePlatformIE(InfoExtractor): webpage) if mobj is not None: return mobj.group('url') - # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) + PLAYER_JS_RE = r''' + ]+ + src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) + .+? + ''' + # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) mobj = re.search( r'''(?xs) - ]+ - src=(?P["\'])(?:https?:)?//(?P.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) - .+? + %s ]+ - class=(?P["\'])eagleplayer(?P=q2)[^>]+ + class=(?P["\'])eagleplayer(?P=qclass)[^>]+ data-id=["\'](?P\d+) - ''', webpage) + ''' % PLAYER_JS_RE, webpage) + if mobj is not None: + return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() + # Generalization of "Javascript code usage", "Combined usage" and + # "Usage without attaching to DOM" embeddings (see + # http://dultonmedia.github.io/eplayer/) + mobj = re.search( + r'''(?xs) + %s + + ''' % PLAYER_JS_RE, webpage) if mobj is not None: return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() @@ -79,9 +106,10 @@ class EaglePlatformIE(InfoExtractor): if status != 200: raise ExtractorError(' '.join(response['errors']), expected=True) - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs): + def _download_json(self, url_or_request, video_id, *args, **kwargs): try: - response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) + response = super(EaglePlatformIE, self)._download_json( + url_or_request, video_id, *args, **kwargs) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError): response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) @@ -93,11 +121,24 @@ class EaglePlatformIE(InfoExtractor): return self._download_json(url_or_request, video_id, note)['data'][0] def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + mobj = re.match(self._VALID_URL, url) host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') + headers = {} + query = { + 'id': video_id, + } + + referrer = smuggled_data.get('referrer') + if referrer: + headers['Referer'] = referrer + query['referrer'] = referrer + player_data = self._download_json( - 'http://%s/api/player_data?id=%s' % (host, video_id), video_id) + 'http://%s/api/player_data' % host, video_id, + headers=headers, query=query) media = player_data['data']['playlist']['viewports'][0]['medialist'][0] @@ -134,7 +175,7 @@ class EaglePlatformIE(InfoExtractor): video_id, 'Downloading mp4 JSON', fatal=False) if mp4_data: for format_id, format_url in mp4_data.get('data', {}).items(): - if not isinstance(format_url, compat_str): + if not url_or_none(format_url): continue height = int_or_none(format_id) if height is not None and m3u8_formats_dict.get(height):