]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/viqeo.py
Update upstream source from tag 'upstream/2020.06.16.1'
[youtubedl] / youtube_dl / extractor / viqeo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 int_or_none,
9 str_or_none,
10 url_or_none,
11 )
12
13
14 class ViqeoIE(InfoExtractor):
15 _VALID_URL = r'''(?x)
16 (?:
17 viqeo:|
18 https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
19 https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
20 )
21 (?P<id>[\da-f]+)
22 '''
23 _TESTS = [{
24 'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
25 'md5': 'a169dd1a6426b350dca4296226f21e76',
26 'info_dict': {
27 'id': 'cde96f09d25f39bee837',
28 'ext': 'mp4',
29 'title': 'cde96f09d25f39bee837',
30 'thumbnail': r're:^https?://.*\.jpg$',
31 'duration': 76,
32 },
33 }, {
34 'url': 'viqeo:cde96f09d25f39bee837',
35 'only_matching': True,
36 }, {
37 'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
38 'only_matching': True,
39 }]
40
41 @staticmethod
42 def _extract_urls(webpage):
43 return [
44 mobj.group('url')
45 for mobj in re.finditer(
46 r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
47 webpage)]
48
49 def _real_extract(self, url):
50 video_id = self._match_id(url)
51
52 webpage = self._download_webpage(
53 'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
54
55 data = self._parse_json(
56 self._search_regex(
57 r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
58 video_id)
59
60 formats = []
61 thumbnails = []
62 for media_file in data['mediaFiles']:
63 if not isinstance(media_file, dict):
64 continue
65 media_url = url_or_none(media_file.get('url'))
66 if not media_url or not media_url.startswith(('http', '//')):
67 continue
68 media_type = str_or_none(media_file.get('type'))
69 if not media_type:
70 continue
71 media_kind = media_type.split('/')[0].lower()
72 f = {
73 'url': media_url,
74 'width': int_or_none(media_file.get('width')),
75 'height': int_or_none(media_file.get('height')),
76 }
77 format_id = str_or_none(media_file.get('quality'))
78 if media_kind == 'image':
79 f['id'] = format_id
80 thumbnails.append(f)
81 elif media_kind in ('video', 'audio'):
82 is_audio = media_kind == 'audio'
83 f.update({
84 'format_id': 'audio' if is_audio else format_id,
85 'fps': int_or_none(media_file.get('fps')),
86 'vcodec': 'none' if is_audio else None,
87 })
88 formats.append(f)
89 self._sort_formats(formats)
90
91 duration = int_or_none(data.get('duration'))
92
93 return {
94 'id': video_id,
95 'title': video_id,
96 'duration': duration,
97 'thumbnails': thumbnails,
98 'formats': formats,
99 }