]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sportbox.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..compat
import compat_urlparse
13 class SportBoxIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)'
16 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S',
17 'md5': 'ff56a598c2cf411a9a38a69709e97079',
21 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
22 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad',
23 'thumbnail': 're:^https?://.*\.jpg$',
24 'upload_date': '20140928',
28 'skip_download': True,
31 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4',
32 'only_matching': True,
34 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355',
35 'only_matching': True,
38 def _real_extract(self
, url
):
39 mobj
= re
.match(self
._VALID
_URL
, url
)
40 display_id
= mobj
.group('display_id')
42 webpage
= self
._download
_webpage
(url
, display_id
)
44 player
= self
._search
_regex
(
45 r
'src="/?(vdl/player/[^"]+)"', webpage
, 'player')
47 title
= self
._html
_search
_regex
(
48 [r
'"nodetitle"\s*:\s*"([^"]+)"', r
'class="node-header_{1,2}title">([^<]+)'],
50 description
= self
._og
_search
_description
(webpage
) or self
._html
_search
_meta
(
51 'description', webpage
, 'description')
52 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
53 upload_date
= unified_strdate(self
._html
_search
_meta
(
54 'dateCreated', webpage
, 'upload date'))
57 '_type': 'url_transparent',
58 'url': compat_urlparse
.urljoin(url
, '/%s' % player
),
59 'display_id': display_id
,
61 'description': description
,
62 'thumbnail': thumbnail
,
63 'upload_date': upload_date
,
67 class SportBoxEmbedIE(InfoExtractor
):
68 _VALID_URL
= r
'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
70 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
74 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
75 'thumbnail': 're:^https?://.*\.jpg$',
79 'skip_download': True,
82 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
83 'only_matching': True,
87 def _extract_urls(webpage
):
89 r
'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
92 def _real_extract(self
, url
):
93 video_id
= self
._match
_id
(url
)
95 webpage
= self
._download
_webpage
(url
, video_id
)
97 hls
= self
._search
_regex
(
98 r
"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^
'\"]+)['\"]",
101 formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
103 title = self._search_regex(
104 r'sportboxPlayer\.node_title\s*=\s*"([^
"]+)"', webpage, 'title
')
106 thumbnail = self._search_regex(
107 r'sportboxPlayer\
.jwplayer_common_params\
.image\s
*=\s
*"([^"]+)"',
108 webpage, 'thumbnail', default=None)
113 'thumbnail': thumbnail,