Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kinopoisk.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     dict_get,
   7     int_or_none,
   8 )
   9
  10
  11 class KinoPoiskIE(InfoExtractor):
  12     _GEO_COUNTRIES = ['RU']
  13     _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
  14     _TESTS = [{
  15         'url': 'https://www.kinopoisk.ru/film/81041/watch/',
  16         'md5': '4f71c80baea10dfa54a837a46111d326',
  17         'info_dict': {
  18             'id': '81041',
  19             'ext': 'mp4',
  20             'title': 'Алеша попович и тугарин змей',
  21             'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
  22             'thumbnail': r're:^https?://.*',
  23             'duration': 4533,
  24             'age_limit': 12,
  25         },
  26         'params': {
  27             'format': 'bestvideo',
  28         },
  29     }, {
  30         'url': 'https://www.kinopoisk.ru/film/81041',
  31         'only_matching': True,
  32     }]
  33
  34     def _real_extract(self, url):
  35         video_id = self._match_id(url)
  36
  37         webpage = self._download_webpage(
  38             'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
  39             query={'kpId': video_id})
  40
  41         data = self._parse_json(
  42             self._search_regex(
  43                 r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
  44                 webpage, 'data'),
  45             video_id)['models']
  46
  47         film = data['filmStatus']
  48         title = film.get('title') or film['originalTitle']
  49
  50         formats = self._extract_m3u8_formats(
  51             data['playlistEntity']['uri'], video_id, 'mp4',
  52             entry_protocol='m3u8_native', m3u8_id='hls')
  53         self._sort_formats(formats)
  54
  55         description = dict_get(
  56             film, ('descriptscription', 'description',
  57                    'shortDescriptscription', 'shortDescription'))
  58         thumbnail = film.get('coverUrl') or film.get('posterUrl')
  59         duration = int_or_none(film.get('duration'))
  60         age_limit = int_or_none(film.get('restrictionAge'))
  61
  62         return {
  63             'id': video_id,
  64             'title': title,
  65             'description': description,
  66             'thumbnail': thumbnail,
  67             'duration': duration,
  68             'age_limit': age_limit,
  69             'formats': formats,
  70         }