]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kinopoisk.py
debian/control: Mark compliance with Debian policy 4.1.5.
[youtubedl] / youtube_dl / extractor / kinopoisk.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 dict_get,
7 int_or_none,
8 )
9
10
11 class KinoPoiskIE(InfoExtractor):
12 _GEO_COUNTRIES = ['RU']
13 _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
14 _TESTS = [{
15 'url': 'https://www.kinopoisk.ru/film/81041/watch/',
16 'md5': '4f71c80baea10dfa54a837a46111d326',
17 'info_dict': {
18 'id': '81041',
19 'ext': 'mp4',
20 'title': 'Алеша попович и тугарин змей',
21 'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
22 'thumbnail': r're:^https?://.*',
23 'duration': 4533,
24 'age_limit': 12,
25 },
26 'params': {
27 'format': 'bestvideo',
28 },
29 }, {
30 'url': 'https://www.kinopoisk.ru/film/81041',
31 'only_matching': True,
32 }]
33
34 def _real_extract(self, url):
35 video_id = self._match_id(url)
36
37 webpage = self._download_webpage(
38 'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
39 query={'kpId': video_id})
40
41 data = self._parse_json(
42 self._search_regex(
43 r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
44 webpage, 'data'),
45 video_id)['models']
46
47 film = data['filmStatus']
48 title = film.get('title') or film['originalTitle']
49
50 formats = self._extract_m3u8_formats(
51 data['playlistEntity']['uri'], video_id, 'mp4',
52 entry_protocol='m3u8_native', m3u8_id='hls')
53 self._sort_formats(formats)
54
55 description = dict_get(
56 film, ('descriptscription', 'description',
57 'shortDescriptscription', 'shortDescription'))
58 thumbnail = film.get('coverUrl') or film.get('posterUrl')
59 duration = int_or_none(film.get('duration'))
60 age_limit = int_or_none(film.get('restrictionAge'))
61
62 return {
63 'id': video_id,
64 'title': title,
65 'description': description,
66 'thumbnail': thumbnail,
67 'duration': duration,
68 'age_limit': age_limit,
69 'formats': formats,
70 }