]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sportbox.py
becdf658f6e0ce8b209dffc0ce4c96a2857099dc
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  13 class SportBoxIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://news\.sportbox\.ru/Vidy_sporta/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' 
  17             'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', 
  18             'md5': 'ff56a598c2cf411a9a38a69709e97079', 
  22                 'title': 'Гонка 2  заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', 
  23                 'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed', 
  24                 'thumbnail': 're:^https?://.*\.jpg$', 
  25                 'timestamp': 1411896237, 
  26                 'upload_date': '20140928', 
  31                 'skip_download': True, 
  34             'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', 
  35             'only_matching': True, 
  39     def _real_extract(self
, url
): 
  40         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  41         display_id 
= mobj
.group('display_id') 
  43         webpage 
= self
._download
_webpage
(url
, display_id
) 
  45         video_id 
= self
._search
_regex
( 
  46             r
'src="/vdl/player/media/(\d+)"', webpage
, 'video id') 
  48         player 
= self
._download
_webpage
( 
  49             'http://news.sportbox.ru/vdl/player/media/%s' % video_id
, 
  50             display_id
, 'Downloading player webpage') 
  52         hls 
= self
._search
_regex
( 
  53             r
"var\s+original_hls_file\s*=\s*'([^']+)'", player
, 'hls file') 
  55         formats 
= self
._extract
_m
3u8_formats
(hls
, display_id
, 'mp4') 
  57         title 
= self
._html
_search
_regex
( 
  58             r
'<h1 itemprop="name">([^<]+)</h1>', webpage
, 'title') 
  59         description 
= self
._html
_search
_regex
( 
  60             r
'(?s)<div itemprop="description">(.+?)</div>', webpage
, 'description', fatal
=False) 
  61         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  62         timestamp 
= parse_iso8601(self
._search
_regex
( 
  63             r
'<span itemprop="uploadDate">([^<]+)</span>', webpage
, 'timestamp', fatal
=False)) 
  64         duration 
= parse_duration(self
._html
_search
_regex
( 
  65             r
'<meta itemprop="duration" content="PT([^"]+)">', webpage
, 'duration', fatal
=False)) 
  69             'display_id': display_id
, 
  71             'description': description
, 
  72             'thumbnail': thumbnail
, 
  73             'timestamp': timestamp
,