]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kontrtube.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  13 class KontrTubeIE(InfoExtractor
): 
  15     IE_DESC 
= 'KontrTube.ru - Труба зовёт' 
  16     _VALID_URL 
= r
'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/' 
  19         'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', 
  20         'md5': '975a991a4926c9a85f383a736a2e6b80', 
  23             'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag', 
  25             'title': 'Над олимпийской деревней в Сочи поднят российский флаг', 
  26             'description': 'md5:80edc4c613d5887ae8ccf1d59432be41', 
  27             'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg', 
  32     def _real_extract(self
, url
): 
  33         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  34         video_id 
= mobj
.group('id') 
  35         display_id 
= mobj
.group('display_id') 
  37         webpage 
= self
._download
_webpage
( 
  38             url
, display_id
, 'Downloading page') 
  40         video_url 
= self
._search
_regex
( 
  41             r
"video_url\s*:\s*'(.+?)/?',", webpage
, 'video URL') 
  42         thumbnail 
= self
._search
_regex
( 
  43             r
"preview_url\s*:\s*'(.+?)/?',", webpage
, 'thumbnail', fatal
=False) 
  44         title 
= self
._html
_search
_regex
( 
  45             r
'(?s)<h2>(.+?)</h2>', webpage
, 'title') 
  46         description 
= self
._html
_search
_meta
( 
  47             'description', webpage
, 'description') 
  49         duration 
= self
._search
_regex
( 
  50             r
'Длительность: <em>([^<]+)</em>', webpage
, 'duration', fatal
=False) 
  52             duration 
= parse_duration(duration
.replace('мин', 'min').replace('сек', 'sec')) 
  54         view_count 
= self
._search
_regex
( 
  55             r
'Просмотров: <em>([^<]+)</em>', 
  56             webpage
, 'view count', fatal
=False) 
  58             view_count 
= int_or_none(view_count
.replace(' ', '')) 
  60         comment_count 
= int_or_none(self
._search
_regex
( 
  61             r
'Комментарии \((\d+)\)<', webpage
, ' comment count', fatal
=False)) 
  65             'display_id': display_id
, 
  67             'thumbnail': thumbnail
, 
  69             'description': description
, 
  71             'view_count': int_or_none(view_count
), 
  72             'comment_count': int_or_none(comment_count
),