Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/iprima.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6
   7 from .common import InfoExtractor
   8 from ..utils import (
   9     determine_ext,
  10     js_to_json,
  11 )
  12
  13
  14 class IPrimaIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  16     _GEO_BYPASS = False
  17
  18     _TESTS = [{
  19         'url': 'https://prima.iprima.cz/particka/92-epizoda',
  20         'info_dict': {
  21             'id': 'p51388',
  22             'ext': 'mp4',
  23             'title': 'Partička (92)',
  24             'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
  25         },
  26         'params': {
  27             'skip_download': True,  # m3u8 download
  28         },
  29     }, {
  30         'url': 'https://cnn.iprima.cz/videa/70-epizoda',
  31         'info_dict': {
  32             'id': 'p681554',
  33             'ext': 'mp4',
  34             'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
  35         },
  36         'params': {
  37             'skip_download': True,  # m3u8 download
  38         },
  39     }, {
  40         'url': 'http://play.iprima.cz/particka/particka-92',
  41         'only_matching': True,
  42     }, {
  43         # geo restricted
  44         'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
  45         'only_matching': True,
  46     }, {
  47         # iframe api.play-backend.iprima.cz
  48         'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
  49         'only_matching': True,
  50     }, {
  51         # iframe prima.iprima.cz
  52         'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
  53         'only_matching': True,
  54     }, {
  55         'url': 'http://www.iprima.cz/filmy/desne-rande',
  56         'only_matching': True,
  57     }, {
  58         'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
  59         'only_matching': True,
  60     }, {
  61         'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
  62         'only_matching': True,
  63     }, {
  64         'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
  65         'only_matching': True,
  66     }, {
  67         'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
  68         'only_matching': True,
  69     }, {
  70         'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
  71         'only_matching': True,
  72     }]
  73
  74     def _real_extract(self, url):
  75         video_id = self._match_id(url)
  76
  77         self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
  78
  79         webpage = self._download_webpage(url, video_id)
  80
  81         title = self._og_search_title(
  82             webpage, default=None) or self._search_regex(
  83             r'<h1>([^<]+)', webpage, 'title')
  84
  85         video_id = self._search_regex(
  86             (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
  87              r'data-product="([^"]+)">',
  88              r'id=["\']player-(p\d+)"',
  89              r'playerId\s*:\s*["\']player-(p\d+)'),
  90             webpage, 'real id')
  91
  92         playerpage = self._download_webpage(
  93             'http://play.iprima.cz/prehravac/init',
  94             video_id, note='Downloading player', query={
  95                 '_infuse': 1,
  96                 '_ts': round(time.time()),
  97                 'productId': video_id,
  98             }, headers={'Referer': url})
  99
 100         formats = []
 101
 102         def extract_formats(format_url, format_key=None, lang=None):
 103             ext = determine_ext(format_url)
 104             new_formats = []
 105             if format_key == 'hls' or ext == 'm3u8':
 106                 new_formats = self._extract_m3u8_formats(
 107                     format_url, video_id, 'mp4', entry_protocol='m3u8_native',
 108                     m3u8_id='hls', fatal=False)
 109             elif format_key == 'dash' or ext == 'mpd':
 110                 return
 111                 new_formats = self._extract_mpd_formats(
 112                     format_url, video_id, mpd_id='dash', fatal=False)
 113             if lang:
 114                 for f in new_formats:
 115                     if not f.get('language'):
 116                         f['language'] = lang
 117             formats.extend(new_formats)
 118
 119         options = self._parse_json(
 120             self._search_regex(
 121                 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
 122                 playerpage, 'player options', default='{}'),
 123             video_id, transform_source=js_to_json, fatal=False)
 124         if options:
 125             for key, tracks in options.get('tracks', {}).items():
 126                 if not isinstance(tracks, list):
 127                     continue
 128                 for track in tracks:
 129                     src = track.get('src')
 130                     if src:
 131                         extract_formats(src, key.lower(), track.get('lang'))
 132
 133         if not formats:
 134             for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
 135                 extract_formats(src)
 136
 137         if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
 138             self.raise_geo_restricted(countries=['CZ'])
 139
 140         self._sort_formats(formats)
 141
 142         return {
 143             'id': video_id,
 144             'title': title,
 145             'thumbnail': self._og_search_thumbnail(webpage, default=None),
 146             'formats': formats,
 147             'description': self._og_search_description(webpage, default=None),
 148         }