X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/0865c28fb29a6481cd837cf8c1ef0cd134c6ef8e..1d04e265122c7ed6edf8f3c75a0619931b9368b9:/youtube_dl/extractor/laola1tv.py diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index b459559..1f91ba0 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -1,86 +1,217 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import random -import re +import json from .common import InfoExtractor from ..utils import ( ExtractorError, + unified_strdate, + urlencode_postdata, + xpath_element, xpath_text, + update_url_query, + js_to_json, ) -class Laola1TvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P[a-z]+)-(?P[a-z]+)/.*?/(?P[0-9]+)\.html' - _TEST = { +class Laola1TvEmbedIE(InfoExtractor): + IE_NAME = 'laola1tv:embed' + _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P\d+)' + _TESTS = [{ + # flashvars.premium = "false"; + 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024', + 'info_dict': { + 'id': '708065', + 'ext': 'mp4', + 'title': 'MA Long CHN - FAN Zhendong CHN', + 'uploader': 'ITTF - International Table Tennis Federation', + 'upload_date': '20161211', + }, + }] + + def _extract_token_url(self, stream_access_url, video_id, data): + return self._download_json( + stream_access_url, video_id, headers={ + 'Content-Type': 'application/json', + }, data=json.dumps(data).encode())['data']['stream-access'][0] + + def _extract_formats(self, token_url, video_id): + token_doc = self._download_xml( + token_url, video_id, 'Downloading token', + headers=self.geo_verification_headers()) + + token_attrib = xpath_element(token_doc, './/token').attrib + + if token_attrib['status'] != '0': + raise ExtractorError( + 'Token error: %s' % token_attrib['comment'], expected=True) + + formats = self._extract_akamai_formats( + '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']), + video_id) + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + flash_vars = self._search_regex( + r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars') + + def get_flashvar(x, *args, **kwargs): + flash_var = self._search_regex( + r'%s\s*:\s*"([^"]+)"' % x, + flash_vars, x, default=None) + if not flash_var: + flash_var = self._search_regex([ + r'flashvars\.%s\s*=\s*"([^"]+)"' % x, + r'%s\s*=\s*"([^"]+)"' % x], + webpage, x, *args, **kwargs) + return flash_var + + hd_doc = self._download_xml( + 'http://www.laola1.tv/server/hd_video.php', video_id, query={ + 'play': get_flashvar('streamid'), + 'partner': get_flashvar('partnerid'), + 'portal': get_flashvar('portalid'), + 'lang': get_flashvar('sprache'), + 'v5ident': '', + }) + + _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) + title = _v('title', fatal=True) + + token_url = None + premium = get_flashvar('premium', default=None) + if premium: + token_url = update_url_query( + _v('url', fatal=True), { + 'timestamp': get_flashvar('timestamp'), + 'auth': get_flashvar('auth'), + }) + else: + data_abo = urlencode_postdata( + dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) + stream_access_url = update_url_query( + 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', { + 'videoId': _v('id'), + 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), + 'label': _v('label'), + 'area': _v('area'), + }) + token_url = self._extract_token_url(stream_access_url, video_id, data_abo) + + formats = self._extract_formats(token_url, video_id) + + categories_str = _v('meta_sports') + categories = categories_str.split(',') if categories_str else [] + is_live = _v('islive') == 'true' + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'upload_date': unified_strdate(_v('time_date')), + 'uploader': _v('meta_organisation'), + 'categories': categories, + 'is_live': is_live, + 'formats': formats, + } + + +class Laola1TvIE(Laola1TvEmbedIE): + IE_NAME = 'laola1tv' + _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'info_dict': { 'id': '227883', - 'ext': 'mp4', + 'display_id': 'straubing-tigers-koelner-haie', + 'ext': 'flv', 'title': 'Straubing Tigers - Kölner Haie', + 'upload_date': '20140912', + 'is_live': False, 'categories': ['Eishockey'], + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie', + 'info_dict': { + 'id': '464602', + 'display_id': 'straubing-tigers-koelner-haie', + 'ext': 'flv', + 'title': 'Straubing Tigers - Kölner Haie', + 'upload_date': '20160129', 'is_live': False, + 'categories': ['Eishockey'], }, 'params': { 'skip_download': True, - } - } + }, + }, { + 'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde', + 'info_dict': { + 'id': '487850', + 'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde', + 'ext': 'flv', + 'title': 'Belogorie BELGOROD - TRENTINO Diatec', + 'upload_date': '20160322', + 'uploader': 'CEV - Europäischer Volleyball Verband', + 'is_live': True, + 'categories': ['Volleyball'], + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This live stream has already finished.', + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - lang = mobj.group('lang') - portal = mobj.group('portal') - - webpage = self._download_webpage(url, video_id) - iframe_url = self._search_regex( - r']*?class="main_tv_player"[^>]*?src="([^"]+)"', - webpage, 'iframe URL') + display_id = self._match_id(url) - iframe = self._download_webpage( - iframe_url, video_id, note='Downloading iframe') - flashvars_m = re.findall( - r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe) - flashvars = dict((m[0], m[1]) for m in flashvars_m) + webpage = self._download_webpage(url, display_id) - partner_id = self._search_regex( - r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id') + if 'Dieser Livestream ist bereits beendet.' in webpage: + raise ExtractorError('This live stream has already finished.', expected=True) - xml_url = ('http://www.laola1.tv/server/hd_video.php?' + - 'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % ( - video_id, partner_id, portal, lang)) - hd_doc = self._download_xml(xml_url, video_id) + conf = self._parse_json(self._search_regex( + r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), + display_id, js_to_json) - title = xpath_text(hd_doc, './/video/title', fatal=True) - flash_url = xpath_text(hd_doc, './/video/url', fatal=True) - uploader = xpath_text(hd_doc, './/video/meta_organistation') - is_live = xpath_text(hd_doc, './/video/islive') == 'true' + video_id = conf['videoid'] - categories = xpath_text(hd_doc, './/video/meta_sports') - if categories: - categories = categories.split(',') + config = self._download_json(conf['configUrl'], video_id, query={ + 'videoid': video_id, + 'partnerid': conf['partnerid'], + 'language': conf.get('language', ''), + 'portal': conf.get('portalid', ''), + }) + error = config.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) - ident = random.randint(10000000, 99999999) - token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % ( - flash_url, ident, flashvars['timestamp'], flashvars['auth']) + video_data = config['video'] + title = video_data['title'] + is_live = video_data.get('isLivestream') and video_data.get('isLive') + meta = video_data.get('metaInformation') + sports = meta.get('sports') + categories = sports.split(',') if sports else [] - token_doc = self._download_xml( - token_url, video_id, note='Downloading token') - token_attrib = token_doc.find('.//token').attrib - if token_attrib.get('auth') in ('blocked', 'restricted'): - raise ExtractorError( - 'Token error: %s' % token_attrib.get('comment'), expected=True) + token_url = self._extract_token_url( + video_data['streamAccess'], video_id, + video_data['abo']['required']) - video_url = '%s?hdnea=%s&hdcore=3.2.0' % ( - token_attrib['url'], token_attrib['auth']) + formats = self._extract_formats(token_url, video_id) return { 'id': video_id, - 'is_live': is_live, - 'title': title, - 'url': video_url, - 'uploader': uploader, + 'display_id': display_id, + 'title': self._live_title(title) if is_live else title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('image'), 'categories': categories, - 'ext': 'mp4', + 'formats': formats, + 'is_live': is_live, }