X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/af478477605bdf3f5d57562035885cfee905f379..ced7488f6d3a519b2c1b1cbd31048743fb8285bd:/youtube_dl/extractor/vesti.py diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py index f51d4dc..cb64ae0 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vesti.py @@ -4,16 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none -) +from ..utils import ExtractorError +from .rutv import RUTVIE class VestiIE(InfoExtractor): - IE_NAME = 'vesti' IE_DESC = 'Вести.Ru' - _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P.+)' + _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P.+)' _TESTS = [ { @@ -30,6 +27,20 @@ class VestiIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://www.vesti.ru/doc.html?id=1349233', + 'info_dict': { + 'id': '773865', + 'ext': 'mp4', + 'title': 'Участники митинга штурмуют Донецкую областную администрацию', + 'description': 'md5:1a160e98b3195379b4c849f2f4958009', + 'duration': 210, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { 'url': 'http://www.vesti.ru/only_video.html?vid=576180', 'info_dict': { @@ -44,6 +55,20 @@ class VestiIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://hitech.vesti.ru/news/view/id/4000', + 'info_dict': { + 'id': '766888', + 'ext': 'mp4', + 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', + 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', + 'duration': 279, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', 'info_dict': { @@ -57,7 +82,7 @@ class VestiIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'skip': 'Blocked outside Russia' + 'skip': 'Blocked outside Russia', }, { 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', @@ -72,7 +97,7 @@ class VestiIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Translation has finished' - } + }, ] def _real_extract(self, url): @@ -81,90 +106,16 @@ class VestiIE(InfoExtractor): page = self._download_webpage(url, video_id, 'Downloading page') - mobj = re.search(r'', page) + mobj = re.search( + r']+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P\d+)', + page) if mobj: - video_type = 'video' - video_id = mobj.group('id') - else: - mobj = re.search( - r'[^/]+)/id/(?P\d+)[^"]*".*?>', page) - - if not mobj: - raise ExtractorError('No media found') - - video_type = mobj.group('type') video_id = mobj.group('id') + page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id, + 'Downloading video page') - json_data = self._download_json( - 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), - video_id, 'Downloading JSON') - - if json_data['errors']: - raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True) - - playlist = json_data['data']['playlist'] - medialist = playlist['medialist'] - media = medialist[0] - - if media['errors']: - raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True) - - view_count = playlist.get('count_views') - priority_transport = playlist['priority_transport'] - - thumbnail = media['picture'] - width = media['width'] - height = media['height'] - description = media['anons'] - title = media['title'] - duration = int_or_none(media.get('duration')) - - formats = [] - - for transport, links in media['sources'].items(): - for quality, url in links.items(): - if transport == 'rtmp': - mobj = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?P.+)$', url) - if not mobj: - continue - fmt = { - 'url': mobj.group('url'), - 'play_path': mobj.group('playpath'), - 'app': mobj.group('app'), - 'page_url': 'http://player.rutv.ru', - 'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22', - 'rtmp_live': True, - 'ext': 'flv', - 'vbr': int(quality), - } - elif transport == 'm3u8': - fmt = { - 'url': url, - 'ext': 'mp4', - } - else: - fmt = { - 'url': url - } - fmt.update({ - 'width': width, - 'height': height, - 'format_id': '%s-%s' % (transport, quality), - 'preference': -1 if priority_transport == transport else -2, - }) - formats.append(fmt) - - if not formats: - raise ExtractorError('No media links available for %s' % video_id) - - self._sort_formats(formats) + rutv_url = RUTVIE._extract_url(page) + if rutv_url: + return self.url_result(rutv_url, 'RUTV') - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'duration': duration, - 'formats': formats, - } \ No newline at end of file + raise ExtractorError('No video found', expected=True)