]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kontrtube.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
13 class KontrTubeIE(InfoExtractor
):
15 IE_DESC
= 'KontrTube.ru - Труба зовёт'
16 _VALID_URL
= r
'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
19 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
20 'md5': '975a991a4926c9a85f383a736a2e6b80',
23 'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
25 'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
26 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
27 'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
32 def _real_extract(self
, url
):
33 mobj
= re
.match(self
._VALID
_URL
, url
)
34 video_id
= mobj
.group('id')
35 display_id
= mobj
.group('display_id')
37 webpage
= self
._download
_webpage
(
38 url
, display_id
, 'Downloading page')
40 video_url
= self
._search
_regex
(
41 r
"video_url\s*:\s*'(.+?)/?',", webpage
, 'video URL')
42 thumbnail
= self
._search
_regex
(
43 r
"preview_url\s*:\s*'(.+?)/?',", webpage
, 'thumbnail', fatal
=False)
44 title
= self
._html
_search
_regex
(
45 r
'(?s)<h2>(.+?)</h2>', webpage
, 'title')
46 description
= self
._html
_search
_meta
(
47 'description', webpage
, 'description')
49 duration
= self
._search
_regex
(
50 r
'Длительность: <em>([^<]+)</em>', webpage
, 'duration', fatal
=False)
52 duration
= parse_duration(duration
.replace('мин', 'min').replace('сек', 'sec'))
54 view_count
= self
._search
_regex
(
55 r
'Просмотров: <em>([^<]+)</em>',
56 webpage
, 'view count', fatal
=False)
58 view_count
= int_or_none(view_count
.replace(' ', ''))
60 comment_count
= int_or_none(self
._search
_regex
(
61 r
'Комментарии \((\d+)\)<', webpage
, ' comment count', fatal
=False))
65 'display_id': display_id
,
67 'thumbnail': thumbnail
,
69 'description': description
,
71 'view_count': int_or_none(view_count
),
72 'comment_count': int_or_none(comment_count
),