]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sportbox.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
15 class SportBoxIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
18 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
22 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
23 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
24 'thumbnail': r
're:^https?://.*\.jpg$',
27 'timestamp': 1426237001,
28 'upload_date': '20150313',
32 'skip_download': True,
35 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
36 'only_matching': True,
38 'url': 'https://news.sportbox.ru/vdl/player/media/193095',
39 'only_matching': True,
41 'url': 'https://news.sportbox.ru/vdl/player/media/109158',
42 'only_matching': True,
44 'url': 'https://matchtv.ru/vdl/player/media/109158',
45 'only_matching': True,
49 def _extract_urls(webpage
):
51 r
'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
54 def _real_extract(self
, url
):
55 video_id
= self
._match
_id
(url
)
57 webpage
= self
._download
_webpage
(url
, video_id
)
59 sources
= self
._parse
_json
(
61 r
'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
63 video_id
, transform_source
=js_to_json
)
66 for source
in sources
:
67 src
= source
.get('src')
70 if determine_ext(src
) == 'm3u8':
71 formats
.extend(self
._extract
_m
3u8_formats
(
72 src
, video_id
, 'mp4', entry_protocol
='m3u8_native',
73 m3u8_id
='hls', fatal
=False))
78 self
._sort
_formats
(formats
)
80 player
= self
._parse
_json
(
82 r
'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage
,
83 'player options', default
='{}'),
84 video_id
, transform_source
=js_to_json
)
85 media_id
= player
['mediaId']
87 info
= self
._search
_json
_ld
(webpage
, media_id
, default
={})
89 view_count
= int_or_none(self
._search
_regex
(
90 r
'Просмотров\s*:\s*(\d+)', webpage
, 'view count', default
=None))
92 return merge_dicts(info
, {
94 'title': self
._og
_search
_title
(webpage
, default
=None) or media_id
,
95 'thumbnail': player
.get('poster'),
96 'duration': int_or_none(player
.get('duration')),
97 'view_count': view_count
,