Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/infoq.py

   1 import base64
   2 import re
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     compat_urllib_parse,
   7
   8     ExtractorError,
   9 )
  10
  11
  12 class InfoQIE(InfoExtractor):
  13     _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
  14     _TEST = {
  15         u"name": u"InfoQ",
  16         u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
  17         u"file": u"12-jan-pythonthings.mp4",
  18         u"info_dict": {
  19             u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
  20             u"title": u"A Few of My Favorite [Python] Things"
  21         },
  22         u"params": {
  23             u"skip_download": True
  24         }
  25     }
  26
  27     def _real_extract(self, url):
  28         mobj = re.match(self._VALID_URL, url)
  29
  30         webpage = self._download_webpage(url, video_id=url)
  31         self.report_extraction(url)
  32
  33         # Extract video URL
  34         mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
  35         if mobj is None:
  36             raise ExtractorError(u'Unable to extract video url')
  37         real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
  38         video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
  39
  40         # Extract title
  41         video_title = self._search_regex(r'contentTitle = "(.*?)";',
  42             webpage, u'title')
  43
  44         # Extract description
  45         video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
  46             webpage, u'description', fatal=False)
  47
  48         video_filename = video_url.split('/')[-1]
  49         video_id, extension = video_filename.split('.')
  50
  51         info = {
  52             'id': video_id,
  53             'url': video_url,
  54             'uploader': None,
  55             'upload_date': None,
  56             'title': video_title,
  57             'ext': extension, # Extension is always(?) mp4, but seems to be flv
  58             'thumbnail': None,
  59             'description': video_description,
  60         }
  61
  62         return [info]