]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sportbox.py
Prepare to release.
[youtubedl] / youtube_dl / extractor / sportbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import js_to_json
8
9
10 class SportBoxEmbedIE(InfoExtractor):
11 _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
12 _TESTS = [{
13 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
14 'info_dict': {
15 'id': '211355',
16 'ext': 'mp4',
17 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
18 'thumbnail': r're:^https?://.*\.jpg$',
19 },
20 'params': {
21 # m3u8 download
22 'skip_download': True,
23 },
24 }, {
25 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
26 'only_matching': True,
27 }]
28
29 @staticmethod
30 def _extract_urls(webpage):
31 return re.findall(
32 r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
33 webpage)
34
35 def _real_extract(self, url):
36 video_id = self._match_id(url)
37
38 webpage = self._download_webpage(url, video_id)
39
40 formats = []
41
42 def cleanup_js(code):
43 # desktop_advert_config contains complex Javascripts and we don't need it
44 return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
45
46 jwplayer_data = self._parse_json(self._search_regex(
47 r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
48 transform_source=cleanup_js)
49
50 hls_url = jwplayer_data.get('hls_url')
51 if hls_url:
52 formats.extend(self._extract_m3u8_formats(
53 hls_url, video_id, ext='mp4', m3u8_id='hls'))
54
55 rtsp_url = jwplayer_data.get('rtsp_url')
56 if rtsp_url:
57 formats.append({
58 'url': rtsp_url,
59 'format_id': 'rtsp',
60 })
61
62 self._sort_formats(formats)
63
64 title = jwplayer_data['node_title']
65 thumbnail = jwplayer_data.get('image_url')
66
67 return {
68 'id': video_id,
69 'title': title,
70 'thumbnail': thumbnail,
71 'formats': formats,
72 }