]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sportbox.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  13 class SportBoxIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' 
  16         'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', 
  17         'md5': 'ff56a598c2cf411a9a38a69709e97079', 
  21             'title': 'Гонка 2  заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', 
  22             'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad', 
  23             'thumbnail': 're:^https?://.*\.jpg$', 
  24             'upload_date': '20140928', 
  28             'skip_download': True, 
  31         'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', 
  32         'only_matching': True, 
  34         'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', 
  35         'only_matching': True, 
  38     def _real_extract(self
, url
): 
  39         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  40         display_id 
= mobj
.group('display_id') 
  42         webpage 
= self
._download
_webpage
(url
, display_id
) 
  44         player 
= self
._search
_regex
( 
  45             r
'src="/?(vdl/player/[^"]+)"', webpage
, 'player') 
  47         title 
= self
._html
_search
_regex
( 
  48             [r
'"nodetitle"\s*:\s*"([^"]+)"', r
'class="node-header_{1,2}title">([^<]+)'], 
  50         description 
= self
._og
_search
_description
(webpage
) or self
._html
_search
_meta
( 
  51             'description', webpage
, 'description') 
  52         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  53         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
  54             'dateCreated', webpage
, 'upload date')) 
  57             '_type': 'url_transparent', 
  58             'url': compat_urlparse
.urljoin(url
, '/%s' % player
), 
  59             'display_id': display_id
, 
  61             'description': description
, 
  62             'thumbnail': thumbnail
, 
  63             'upload_date': upload_date
, 
  67 class SportBoxEmbedIE(InfoExtractor
): 
  68     _VALID_URL 
= r
'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' 
  70         'url': 'http://news.sportbox.ru/vdl/player/ci/211355', 
  74             'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 
  75             'thumbnail': 're:^https?://.*\.jpg$', 
  79             'skip_download': True, 
  82         'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', 
  83         'only_matching': True, 
  87     def _extract_urls(webpage
): 
  89             r
'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', 
  92     def _real_extract(self
, url
): 
  93         video_id 
= self
._match
_id
(url
) 
  95         webpage 
= self
._download
_webpage
(url
, video_id
) 
  97         hls 
= self
._search
_regex
( 
  98             r
"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^
'\"]+)['\"]", 
 101         formats = self._extract_m3u8_formats(hls, video_id, 'mp4') 
 103         title = self._search_regex( 
 104             r'sportboxPlayer\.node_title\s*=\s*"([^
"]+)"', webpage, 'title
') 
 106         thumbnail = self._search_regex( 
 107             r'sportboxPlayer\
.jwplayer_common_params\
.image\s
*=\s
*"([^"]+)"', 
 108             webpage, 'thumbnail', default=None) 
 113             'thumbnail': thumbnail,