Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/viqeo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     int_or_none,
   9     str_or_none,
  10     url_or_none,
  11 )
  12
  13
  14 class ViqeoIE(InfoExtractor):
  15     _VALID_URL = r'''(?x)
  16                         (?:
  17                             viqeo:|
  18                             https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
  19                             https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
  20                         )
  21                         (?P<id>[\da-f]+)
  22                     '''
  23     _TESTS = [{
  24         'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
  25         'md5': 'a169dd1a6426b350dca4296226f21e76',
  26         'info_dict': {
  27             'id': 'cde96f09d25f39bee837',
  28             'ext': 'mp4',
  29             'title': 'cde96f09d25f39bee837',
  30             'thumbnail': r're:^https?://.*\.jpg$',
  31             'duration': 76,
  32         },
  33     }, {
  34         'url': 'viqeo:cde96f09d25f39bee837',
  35         'only_matching': True,
  36     }, {
  37         'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
  38         'only_matching': True,
  39     }]
  40
  41     @staticmethod
  42     def _extract_urls(webpage):
  43         return [
  44             mobj.group('url')
  45             for mobj in re.finditer(
  46                 r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
  47                 webpage)]
  48
  49     def _real_extract(self, url):
  50         video_id = self._match_id(url)
  51
  52         webpage = self._download_webpage(
  53             'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
  54
  55         data = self._parse_json(
  56             self._search_regex(
  57                 r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
  58             video_id)
  59
  60         formats = []
  61         thumbnails = []
  62         for media_file in data['mediaFiles']:
  63             if not isinstance(media_file, dict):
  64                 continue
  65             media_url = url_or_none(media_file.get('url'))
  66             if not media_url or not media_url.startswith(('http', '//')):
  67                 continue
  68             media_type = str_or_none(media_file.get('type'))
  69             if not media_type:
  70                 continue
  71             media_kind = media_type.split('/')[0].lower()
  72             f = {
  73                 'url': media_url,
  74                 'width': int_or_none(media_file.get('width')),
  75                 'height': int_or_none(media_file.get('height')),
  76             }
  77             format_id = str_or_none(media_file.get('quality'))
  78             if media_kind == 'image':
  79                 f['id'] = format_id
  80                 thumbnails.append(f)
  81             elif media_kind in ('video', 'audio'):
  82                 is_audio = media_kind == 'audio'
  83                 f.update({
  84                     'format_id': 'audio' if is_audio else format_id,
  85                     'fps': int_or_none(media_file.get('fps')),
  86                     'vcodec': 'none' if is_audio else None,
  87                 })
  88                 formats.append(f)
  89         self._sort_formats(formats)
  90
  91         duration = int_or_none(data.get('duration'))
  92
  93         return {
  94             'id': video_id,
  95             'title': video_id,
  96             'duration': duration,
  97             'thumbnails': thumbnails,
  98             'formats': formats,
  99         }