Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wistia.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5
   6
   7 class WistiaIE(InfoExtractor):
   8     _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
   9
  10     _TEST = {
  11         u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
  12         u"file": u"sh7fpupwlt.mov",
  13         u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
  14         u"info_dict": {
  15             u"title": u"cfh_resourceful_zdkh_final_1"
  16         },
  17     }
  18
  19     def _real_extract(self, url):
  20         mobj = re.match(self._VALID_URL, url)
  21         video_id = mobj.group('id')
  22
  23         webpage = self._download_webpage(url, video_id)
  24         data_json = self._html_search_regex(
  25             r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
  26
  27         data = json.loads(data_json)
  28
  29         formats = []
  30         thumbnails = []
  31         for atype, a in data['assets'].items():
  32             if atype == 'still':
  33                 thumbnails.append({
  34                     'url': a['url'],
  35                     'resolution': '%dx%d' % (a['width'], a['height']),
  36                 })
  37                 continue
  38             if atype == 'preview':
  39                 continue
  40             formats.append({
  41                 'format_id': atype,
  42                 'url': a['url'],
  43                 'width': a['width'],
  44                 'height': a['height'],
  45                 'filesize': a['size'],
  46                 'ext': a['ext'],
  47             })
  48         formats.sort(key=lambda a: a['filesize'])
  49
  50         return {
  51             'id': video_id,
  52             'title': data['name'],
  53             'formats': formats,
  54             'thumbnails': thumbnails,
  55         }