X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/feb5020b37d7d3ba4005a8bac6f4efece4ce4b8c..b13d4a493050b321e8e726718779a56c4899c51e:/youtube_dl/extractor/tutv.py diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index fcaa6ac..822372e 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,41 +1,35 @@ +from __future__ import unicode_literals + import base64 -import re from .common import InfoExtractor -from ..utils import ( - compat_parse_qs, -) +from ..compat import compat_parse_qs + class TutvIE(InfoExtractor): - _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' _TEST = { - u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', - u'file': u'2742556.flv', - u'md5': u'5eb766671f69b82e528dc1e7769c5cb2', - u'info_dict': { - u"title": u"Noah en pabellon cuahutemoc" - } + 'url': 'http://tu.tv/videos/robots-futbolistas', + 'md5': '0cd9e28ad270488911b0d2a72323395d', + 'info_dict': { + 'id': '2973058', + 'ext': 'mp4', + 'title': 'Robots futbolistas', + }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'', webpage, u'title') - internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID') - data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) - data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info') - data = compat_parse_qs(data_content) - video_url = base64.b64decode(data['kpt'][0]).decode('utf-8') - ext = video_url.partition(u'?')[0].rpartition(u'.')[2] + internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') + + data_content = self._download_webpage( + 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') + video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8') - info = { + return { 'id': internal_id, 'url': video_url, - 'ext': ext, - 'title': title, + 'title': self._og_search_title(webpage), } - return [info]