]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rutv.py
Updated version 2017.09.24 from 'upstream/2017.09.24'
[youtubedl] / youtube_dl / extractor / rutv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 int_or_none
10 )
11
12
13 class RUTVIE(InfoExtractor):
14 IE_DESC = 'RUTV.RU'
15 _VALID_URL = r'''(?x)
16 https?://
17 (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
18 (?P<path>
19 flash\d+v/container\.swf\?id=|
20 iframe/(?P<type>swf|video|live)/id/|
21 index/iframe/cast_id/
22 )
23 (?P<id>\d+)
24 '''
25
26 _TESTS = [
27 {
28 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
29 'info_dict': {
30 'id': '774471',
31 'ext': 'mp4',
32 'title': 'Монологи на все времена',
33 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
34 'duration': 2906,
35 },
36 'params': {
37 # m3u8 download
38 'skip_download': True,
39 },
40 },
41 {
42 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
43 'info_dict': {
44 'id': '774016',
45 'ext': 'mp4',
46 'title': 'Чужой в семье Сталина',
47 'description': '',
48 'duration': 2539,
49 },
50 'params': {
51 # m3u8 download
52 'skip_download': True,
53 },
54 },
55 {
56 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
57 'info_dict': {
58 'id': '766888',
59 'ext': 'mp4',
60 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
61 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
62 'duration': 279,
63 },
64 'params': {
65 # m3u8 download
66 'skip_download': True,
67 },
68 },
69 {
70 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
71 'info_dict': {
72 'id': '771852',
73 'ext': 'mp4',
74 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
75 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
76 'duration': 3096,
77 },
78 'params': {
79 # m3u8 download
80 'skip_download': True,
81 },
82 },
83 {
84 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
85 'info_dict': {
86 'id': '51499',
87 'ext': 'flv',
88 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
89 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
90 },
91 'skip': 'Translation has finished',
92 },
93 {
94 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
95 'info_dict': {
96 'id': '21',
97 'ext': 'mp4',
98 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
99 'is_live': True,
100 },
101 'params': {
102 # m3u8 download
103 'skip_download': True,
104 },
105 },
106 {
107 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
108 'only_matching': True,
109 },
110 ]
111
112 @classmethod
113 def _extract_url(cls, webpage):
114 mobj = re.search(
115 r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
116 if mobj:
117 return mobj.group('url')
118
119 mobj = re.search(
120 r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
121 webpage)
122 if mobj:
123 return mobj.group('url')
124
125 def _real_extract(self, url):
126 mobj = re.match(self._VALID_URL, url)
127 video_id = mobj.group('id')
128 video_path = mobj.group('path')
129
130 if re.match(r'flash\d+v', video_path):
131 video_type = 'video'
132 elif video_path.startswith('iframe'):
133 video_type = mobj.group('type')
134 if video_type == 'swf':
135 video_type = 'video'
136 elif video_path.startswith('index/iframe/cast_id'):
137 video_type = 'live'
138
139 is_live = video_type == 'live'
140
141 json_data = self._download_json(
142 'http://player.rutv.ru/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id),
143 video_id, 'Downloading JSON')
144
145 if json_data['errors']:
146 raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
147
148 playlist = json_data['data']['playlist']
149 medialist = playlist['medialist']
150 media = medialist[0]
151
152 if media['errors']:
153 raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
154
155 view_count = playlist.get('count_views')
156 priority_transport = playlist['priority_transport']
157
158 thumbnail = media['picture']
159 width = int_or_none(media['width'])
160 height = int_or_none(media['height'])
161 description = media['anons']
162 title = media['title']
163 duration = int_or_none(media.get('duration'))
164
165 formats = []
166
167 for transport, links in media['sources'].items():
168 for quality, url in links.items():
169 preference = -1 if priority_transport == transport else -2
170 if transport == 'rtmp':
171 mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
172 if not mobj:
173 continue
174 fmt = {
175 'url': mobj.group('url'),
176 'play_path': mobj.group('playpath'),
177 'app': mobj.group('app'),
178 'page_url': 'http://player.rutv.ru',
179 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
180 'rtmp_live': True,
181 'ext': 'flv',
182 'vbr': int(quality),
183 'preference': preference,
184 }
185 elif transport == 'm3u8':
186 formats.extend(self._extract_m3u8_formats(
187 url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
188 continue
189 else:
190 fmt = {
191 'url': url
192 }
193 fmt.update({
194 'width': width,
195 'height': height,
196 'format_id': '%s-%s' % (transport, quality),
197 })
198 formats.append(fmt)
199
200 self._sort_formats(formats)
201
202 return {
203 'id': video_id,
204 'title': self._live_title(title) if is_live else title,
205 'description': description,
206 'thumbnail': thumbnail,
207 'view_count': view_count,
208 'duration': duration,
209 'formats': formats,
210 'is_live': is_live,
211 }