X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/feb5020b37d7d3ba4005a8bac6f4efece4ce4b8c..fb7740590fb6631cf8e5ae3ba4e7a81b0623cba9:/youtube_dl/extractor/tutv.py?ds=inline diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index fcaa6ac..362318b 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,41 +1,36 @@ -import base64 -import re +from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_b64decode, compat_parse_qs, ) + class TutvIE(InfoExtractor): - _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' _TEST = { - u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', - u'file': u'2742556.flv', - u'md5': u'5eb766671f69b82e528dc1e7769c5cb2', - u'info_dict': { - u"title": u"Noah en pabellon cuahutemoc" - } + 'url': 'http://tu.tv/videos/robots-futbolistas', + 'md5': '0cd9e28ad270488911b0d2a72323395d', + 'info_dict': { + 'id': '2973058', + 'ext': 'mp4', + 'title': 'Robots futbolistas', + }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'', webpage, u'title') - internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID') - data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) - data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info') - data = compat_parse_qs(data_content) - video_url = base64.b64decode(data['kpt'][0]).decode('utf-8') - ext = video_url.partition(u'?')[0].rpartition(u'.')[2] + internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') + + data_content = self._download_webpage( + 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') + video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') - info = { + return { 'id': internal_id, 'url': video_url, - 'ext': ext, - 'title': title, + 'title': self._og_search_title(webpage), } - return [info]