]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/infoq.py
9056742821a2fd99fadd5b1271df7dc6f5127749
[youtubedl] / youtube_dl / extractor / infoq.py
1 import base64
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6 compat_urllib_parse,
7
8 ExtractorError,
9 )
10
11
12 class InfoQIE(InfoExtractor):
13 _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
14
15 def _real_extract(self, url):
16 mobj = re.match(self._VALID_URL, url)
17
18 webpage = self._download_webpage(url, video_id=url)
19 self.report_extraction(url)
20
21 # Extract video URL
22 mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
23 if mobj is None:
24 raise ExtractorError(u'Unable to extract video url')
25 real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
26 video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
27
28 # Extract title
29 video_title = self._search_regex(r'contentTitle = "(.*?)";',
30 webpage, u'title')
31
32 # Extract description
33 video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
34 webpage, u'description', fatal=False)
35
36 video_filename = video_url.split('/')[-1]
37 video_id, extension = video_filename.split('.')
38
39 info = {
40 'id': video_id,
41 'url': video_url,
42 'uploader': None,
43 'upload_date': None,
44 'title': video_title,
45 'ext': extension, # Extension is always(?) mp4, but seems to be flv
46 'thumbnail': None,
47 'description': video_description,
48 }
49
50 return [info]