Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mpora.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..utils import int_or_none
   5
   6
   7 class MporaIE(InfoExtractor):
   8     _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
   9     IE_NAME = 'MPORA'
  10
  11     _TEST = {
  12         'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
  13         'md5': 'a7a228473eedd3be741397cf452932eb',
  14         'info_dict': {
  15             'id': 'AAdo8okx4wiz',
  16             'ext': 'mp4',
  17             'title': 'Katy Curd -  Winter in the Forest',
  18             'duration': 416,
  19             'uploader': 'Peter Newman Media',
  20         },
  21     }
  22
  23     def _real_extract(self, url):
  24         video_id = self._match_id(url)
  25         webpage = self._download_webpage(url, video_id)
  26
  27         data_json = self._search_regex(
  28             [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
  29              r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
  30             webpage, 'json')
  31         data = self._parse_json(data_json, video_id)
  32
  33         uploader = data['info_overlay'].get('username')
  34         duration = data['video']['duration'] // 1000
  35         thumbnail = data['video']['encodings']['sd']['poster']
  36         title = data['info_overlay']['title']
  37
  38         formats = []
  39         for encoding_id, edata in data['video']['encodings'].items():
  40             for src in edata['sources']:
  41                 width_str = self._search_regex(
  42                     r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
  43                     False, default=None)
  44                 vcodec = src['type'].partition('/')[2]
  45
  46                 formats.append({
  47                     'format_id': encoding_id + '-' + vcodec,
  48                     'url': src['src'],
  49                     'vcodec': vcodec,
  50                     'width': int_or_none(width_str),
  51                 })
  52
  53         self._sort_formats(formats)
  54
  55         return {
  56             'id': video_id,
  57             'title': title,
  58             'formats': formats,
  59             'uploader': uploader,
  60             'duration': duration,
  61             'thumbnail': thumbnail,
  62         }