]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lifenews.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
14 class LifeNewsIE(InfoExtractor
):
16 IE_DESC
= 'LIFE | NEWS'
17 _VALID_URL
= r
'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
20 'url': 'http://lifenews.ru/news/126342',
21 'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
25 'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
26 'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
27 'thumbnail': 're:http://.*\.jpg',
28 'upload_date': '20140130',
32 def _real_extract(self
, url
):
33 mobj
= re
.match(self
._VALID
_URL
, url
)
34 video_id
= mobj
.group('id')
36 webpage
= self
._download
_webpage
('http://lifenews.ru/news/%s' % video_id
, video_id
, 'Downloading page')
38 videos
= re
.findall(r
'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage
)
40 raise ExtractorError('No media links available for %s' % video_id
)
42 title
= self
._og
_search
_title
(webpage
)
43 TITLE_SUFFIX
= ' - Первый по срочным новостям — LIFE | NEWS'
44 if title
.endswith(TITLE_SUFFIX
):
45 title
= title
[:-len(TITLE_SUFFIX
)]
47 description
= self
._og
_search
_description
(webpage
)
49 view_count
= self
._html
_search
_regex
(
50 r
'<div class=\'views
\'>(\d
+)</div
>', webpage, 'view count
', fatal=False)
51 comment_count = self._html_search_regex(
52 r'<div
class=\'comments
\'>\s
*<span
class=\'counter
\'>(\d
+)</span
>', webpage, 'comment count
', fatal=False)
54 upload_date = self._html_search_regex(
55 r'<time datetime
=\'([^
\']+)\'>', webpage, 'upload date
', fatal=False)
56 if upload_date is not None:
57 upload_date = unified_strdate(upload_date)
59 def make_entry(video_id, media, video_number=None):
63 'thumbnail
': media[0],
64 'title
': title if video_number is None else '%s-video
%s' % (title, video_number),
65 'description
': description,
66 'view_count
': int_or_none(view_count),
67 'comment_count
': int_or_none(comment_count),
68 'upload_date
': upload_date,
72 return make_entry(video_id, videos[0])
74 return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)]