]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sportbox.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  14 class SportBoxIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' 
  17         'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', 
  18         'md5': 'ff56a598c2cf411a9a38a69709e97079', 
  22             'title': 'Гонка 2  заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', 
  23             'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad', 
  24             'thumbnail': 're:^https?://.*\.jpg$', 
  25             'upload_date': '20140928', 
  29             'skip_download': True, 
  32         'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', 
  33         'only_matching': True, 
  35         'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', 
  36         'only_matching': True, 
  39     def _real_extract(self
, url
): 
  40         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  41         display_id 
= mobj
.group('display_id') 
  43         webpage 
= self
._download
_webpage
(url
, display_id
) 
  45         player 
= self
._search
_regex
( 
  46             r
'src="/?(vdl/player/[^"]+)"', webpage
, 'player') 
  48         title 
= self
._html
_search
_regex
( 
  49             [r
'"nodetitle"\s*:\s*"([^"]+)"', r
'class="node-header_{1,2}title">([^<]+)'], 
  51         description 
= self
._og
_search
_description
(webpage
) or self
._html
_search
_meta
( 
  52             'description', webpage
, 'description') 
  53         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  54         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
  55             'dateCreated', webpage
, 'upload date')) 
  58             '_type': 'url_transparent', 
  59             'url': compat_urlparse
.urljoin(url
, '/%s' % player
), 
  60             'display_id': display_id
, 
  62             'description': description
, 
  63             'thumbnail': thumbnail
, 
  64             'upload_date': upload_date
, 
  68 class SportBoxEmbedIE(InfoExtractor
): 
  69     _VALID_URL 
= r
'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' 
  71         'url': 'http://news.sportbox.ru/vdl/player/ci/211355', 
  75             'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 
  76             'thumbnail': 're:^https?://.*\.jpg$', 
  80             'skip_download': True, 
  83         'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', 
  84         'only_matching': True, 
  88     def _extract_urls(webpage
): 
  90             r
'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', 
  93     def _real_extract(self
, url
): 
  94         video_id 
= self
._match
_id
(url
) 
  96         webpage 
= self
._download
_webpage
(url
, video_id
) 
 100         def cleanup_js(code
): 
 101             # desktop_advert_config contains complex Javascripts and we don't need it 
 102             return js_to_json(re
.sub(r
'desktop_advert_config.*', '', code
)) 
 104         jwplayer_data 
= self
._parse
_json
(self
._search
_regex
( 
 105             r
'(?s)player\.setup\(({.+?})\);', webpage
, 'jwplayer settings'), video_id
, 
 106             transform_source
=cleanup_js
) 
 108         hls_url 
= jwplayer_data
.get('hls_url') 
 110             formats
.extend(self
._extract
_m
3u8_formats
( 
 111                 hls_url
, video_id
, ext
='mp4', m3u8_id
='hls')) 
 113         rtsp_url 
= jwplayer_data
.get('rtsp_url') 
 120         self
._sort
_formats
(formats
) 
 122         title 
= jwplayer_data
['node_title'] 
 123         thumbnail 
= jwplayer_data
.get('image_url') 
 128             'thumbnail': thumbnail
,