]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yandexvideo.py
Update upstream source from tag 'upstream/2020.05.08'
[youtubedl] / youtube_dl / extractor / yandexvideo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 determine_ext,
7 int_or_none,
8 url_or_none,
9 )
10
11
12 class YandexVideoIE(InfoExtractor):
13 _VALID_URL = r'''(?x)
14 https?://
15 (?:
16 yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
17 frontend\.vh\.yandex\.ru/player/
18 )
19 (?P<id>[\da-f]+)
20 '''
21 _TESTS = [{
22 'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
23 'md5': '33955d7ae052f15853dc41f35f17581c',
24 'info_dict': {
25 'id': '4dbb262b4fe5cf15a215de4f34eee34d',
26 'ext': 'mp4',
27 'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
28 'description': '',
29 'thumbnail': r're:^https?://.*\.jpg$',
30 'timestamp': 0,
31 'duration': 30,
32 'age_limit': 18,
33 },
34 }, {
35 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
36 'only_matching': True,
37 }, {
38 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
39 'only_matching': True,
40 }, {
41 'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda',
42 'only_matching': True,
43 }, {
44 # vod-episode, series episode
45 'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee',
46 'only_matching': True,
47 }, {
48 # episode, sports
49 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
50 'only_matching': True,
51 }, {
52 # DASH with DRM
53 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
54 'only_matching': True,
55 }]
56
57 def _real_extract(self, url):
58 video_id = self._match_id(url)
59
60 content = self._download_json(
61 'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
62 video_id, query={
63 'stream_options': 'hires',
64 'disable_trackings': 1,
65 })['content']
66
67 content_url = url_or_none(content.get('content_url')) or url_or_none(
68 content['streams'][0]['url'])
69 title = content.get('title') or content.get('computed_title')
70
71 ext = determine_ext(content_url)
72
73 if ext == 'm3u8':
74 formats = self._extract_m3u8_formats(
75 content_url, video_id, 'mp4', entry_protocol='m3u8_native',
76 m3u8_id='hls')
77 elif ext == 'mpd':
78 formats = self._extract_mpd_formats(
79 content_url, video_id, mpd_id='dash')
80 else:
81 formats = [{'url': content_url}]
82
83 self._sort_formats(formats)
84
85 description = content.get('description')
86 thumbnail = content.get('thumbnail')
87 timestamp = (int_or_none(content.get('release_date'))
88 or int_or_none(content.get('release_date_ut'))
89 or int_or_none(content.get('start_time')))
90 duration = int_or_none(content.get('duration'))
91 series = content.get('program_title')
92 age_limit = int_or_none(content.get('restriction_age'))
93
94 return {
95 'id': video_id,
96 'title': title,
97 'description': description,
98 'thumbnail': thumbnail,
99 'timestamp': timestamp,
100 'duration': duration,
101 'series': series,
102 'age_limit': age_limit,
103 'formats': formats,
104 }