]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lenta.py
New upstream version 2020.06.16
[youtubedl] / youtube_dl / extractor / lenta.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class LentaIE(InfoExtractor):
8 _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
9 _TESTS = [{
10 'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
11 'info_dict': {
12 'id': '964400',
13 'ext': 'mp4',
14 'title': 'Надежду Савченко задержали',
15 'thumbnail': r're:^https?://.*\.jpg$',
16 'duration': 61,
17 'view_count': int,
18 },
19 'params': {
20 'skip_download': True,
21 },
22 }, {
23 # EaglePlatform iframe embed
24 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
25 'info_dict': {
26 'id': '227304',
27 'ext': 'mp4',
28 'title': 'Навальный вышел на свободу',
29 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
30 'thumbnail': r're:^https?://.*\.jpg$',
31 'duration': 87,
32 'view_count': int,
33 'age_limit': 0,
34 },
35 'params': {
36 'skip_download': True,
37 },
38 }]
39
40 def _real_extract(self, url):
41 display_id = self._match_id(url)
42
43 webpage = self._download_webpage(url, display_id)
44
45 video_id = self._search_regex(
46 r'vid\s*:\s*["\']?(\d+)', webpage, 'eagleplatform id',
47 default=None)
48 if video_id:
49 return self.url_result(
50 'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id,
51 ie='EaglePlatform', video_id=video_id)
52
53 return self.url_result(url, ie='Generic')