Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mpora.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..utils import int_or_none
   5
   6
   7 class MporaIE(InfoExtractor):
   8     _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
   9     IE_NAME = 'MPORA'
  10
  11     _TEST = {
  12         'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
  13         'md5': 'a7a228473eedd3be741397cf452932eb',
  14         'info_dict': {
  15             'id': 'AAdo8okx4wiz',
  16             'ext': 'mp4',
  17             'title': 'Katy Curd -  Winter in the Forest',
  18             'duration': 416,
  19             'uploader': 'Peter Newman Media',
  20         },
  21     }
  22
  23     def _real_extract(self, url):
  24         video_id = self._match_id(url)
  25         webpage = self._download_webpage(url, video_id)
  26
  27         data_json = self._search_regex(
  28             r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
  29         data = self._parse_json(data_json, video_id)
  30
  31         uploader = data['info_overlay'].get('username')
  32         duration = data['video']['duration'] // 1000
  33         thumbnail = data['video']['encodings']['sd']['poster']
  34         title = data['info_overlay']['title']
  35
  36         formats = []
  37         for encoding_id, edata in data['video']['encodings'].items():
  38             for src in edata['sources']:
  39                 width_str = self._search_regex(
  40                     r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
  41                     False, default=None)
  42                 vcodec = src['type'].partition('/')[2]
  43
  44                 formats.append({
  45                     'format_id': encoding_id + '-' + vcodec,
  46                     'url': src['src'],
  47                     'vcodec': vcodec,
  48                     'width': int_or_none(width_str),
  49                 })
  50
  51         self._sort_formats(formats)
  52
  53         return {
  54             'id': video_id,
  55             'title': title,
  56             'formats': formats,
  57             'uploader': uploader,
  58             'duration': duration,
  59             'thumbnail': thumbnail,
  60         }