X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/feb5020b37d7d3ba4005a8bac6f4efece4ce4b8c..a497d0e55172891fd4925626374a7afdd811e00f:/youtube_dl/extractor/infoq.py?ds=sidebyside diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index c79c589..ed32373 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -1,62 +1,55 @@ +from __future__ import unicode_literals + import base64 import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, - - ExtractorError, ) class InfoQIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' + _VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P[^/]+)$' _TEST = { - u"name": u"InfoQ", - u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", - u"file": u"12-jan-pythonthings.mp4", - u"info_dict": { - u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", - u"title": u"A Few of My Favorite [Python] Things" + "name": "InfoQ", + "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", + "file": "12-jan-pythonthings.mp4", + "info_dict": { + "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", + "title": "A Few of My Favorite [Python] Things", + }, + "params": { + "skip_download": True, }, - u"params": { - u"skip_download": True - } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id=url) - self.report_extraction(url) + webpage = self._download_webpage(url, video_id) # Extract video URL - mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage) - if mobj is None: - raise ExtractorError(u'Unable to extract video url') - real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8')) + encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id') + real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id # Extract title video_title = self._search_regex(r'contentTitle = "(.*?)";', - webpage, u'title') + webpage, 'title') # Extract description video_description = self._html_search_regex(r'', - webpage, u'description', fatal=False) + webpage, 'description', fatal=False) video_filename = video_url.split('/')[-1] video_id, extension = video_filename.split('.') - info = { + return { 'id': video_id, 'url': video_url, - 'uploader': None, - 'upload_date': None, 'title': video_title, - 'ext': extension, # Extension is always(?) mp4, but seems to be flv - 'thumbnail': None, + 'ext': extension, # Extension is always(?) mp4, but seems to be flv 'description': video_description, } - - return [info] \ No newline at end of file