]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nova.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class NovaEmbedIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)' 
  20         'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', 
  21         'md5': 'b3834f6de5401baabf31ed57456463f7', 
  26             'thumbnail': r
're:^https?://.*\.jpg', 
  31     def _real_extract(self
, url
): 
  32         video_id 
= self
._match
_id
(url
) 
  34         webpage 
= self
._download
_webpage
(url
, video_id
) 
  36         bitrates 
= self
._parse
_json
( 
  38                 r
'(?s)bitrates\s*=\s*({.+?})\s*;', webpage
, 'formats'), 
  39             video_id
, transform_source
=js_to_json
) 
  41         QUALITIES 
= ('lq', 'mq', 'hq', 'hd') 
  42         quality_key 
= qualities(QUALITIES
) 
  45         for format_id
, format_list 
in bitrates
.items(): 
  46             if not isinstance(format_list
, list): 
  48             for format_url 
in format_list
: 
  49                 format_url 
= url_or_none(format_url
) 
  56                 for quality 
in QUALITIES
: 
  57                     if '%s.mp4' % quality 
in format_url
: 
  58                         f_id 
+= '-%s' % quality
 
  60                             'quality': quality_key(quality
), 
  61                             'format_note': quality
.upper(), 
  66         self
._sort
_formats
(formats
) 
  68         title 
= self
._og
_search
_title
( 
  69             webpage
, default
=None) or self
._search
_regex
( 
  70             (r
'<value>(?P<title>[^<]+)', 
  71              r
'videoTitle\s*:\s*(["\'])(?P
<value
>(?
:(?
!\
1).)+)\
1'), webpage, 
  72             'title
', group='value
') 
  73         thumbnail = self._og_search_thumbnail( 
  74             webpage, default=None) or self._search_regex( 
  75             r'poster\s
*:\s
*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 
  76             'thumbnail', fatal=False, group='value') 
  77         duration = int_or_none(self._search_regex( 
  78             r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) 
  83             'thumbnail': thumbnail, 
  89 class NovaIE(InfoExtractor): 
  90     IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' 
  91     _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' 
  93         'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 
  94         'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', 
  97             'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', 
  99             'title': 'Podzemní nemocnice v pražské Krči', 
 100             'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 
 101             'thumbnail': r're:^https?://.*\.(?:jpg)', 
 104         'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 
 108             'title': 'Zaklínač 3: Divoký hon', 
 109             'description': 're:.*Pokud se stejně jako my nemůžete.*', 
 110             'thumbnail': r're:https?://.*\.jpg(\?.*)?', 
 111             'upload_date': '20150521', 
 115             'skip_download': True, 
 118         # media.cms.nova.cz embed 
 119         'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil', 
 123             'title': '2180. díl', 
 124             'thumbnail': r're:^https?://.*\.jpg', 
 128             'skip_download': True, 
 130         'add_ie': [NovaEmbedIE.ie_key()], 
 132         'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', 
 133         'only_matching': True, 
 135         'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 
 136         'only_matching': True, 
 138         'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html', 
 139         'only_matching': True, 
 141         'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html', 
 142         'only_matching': True, 
 144         'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html', 
 145         'only_matching': True, 
 148     def _real_extract(self, url): 
 149         mobj = re.match(self._VALID_URL, url) 
 150         display_id = mobj.group('id') 
 151         site = mobj.group('site') 
 153         webpage = self._download_webpage(url, display_id) 
 156         embed_id = self._search_regex( 
 157             r'<iframe[^>]+\bsrc=["\'](?
:https?
:)?
//media\
.cms\
.nova\
.cz
/embed
/([^
/?
#&]+)', 
 158             webpage
, 'embed url', default
=None) 
 160             return self
.url_result( 
 161                 'https://media.cms.nova.cz/embed/%s' % embed_id
, 
 162                 ie
=NovaEmbedIE
.ie_key(), video_id
=embed_id
) 
 164         video_id 
= self
._search
_regex
( 
 165             [r
"(?:media|video_id)\s*:\s*'(\d+)'", 
 167              r
'id="article_video_(\d+)"', 
 168              r
'id="player_(\d+)"'], 
 171         config_url 
= self
._search
_regex
( 
 172             r
'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"', 
 173             webpage
, 'config url', default
=None) 
 177             player 
= self
._parse
_json
( 
 179                     r
'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s
*:\s
*["\']?\d+.+?})\s*\)', webpage, 
 180                     'player', default='{}'), 
 181                 video_id, transform_source=js_to_json, fatal=False) 
 183                 config_url = url_or_none(player.get('configUrl')) 
 184                 params = player.get('configParams') 
 185                 if isinstance(params, dict): 
 186                     config_params = params 
 189             DEFAULT_SITE_ID = '23000' 
 191                 'tvnoviny': DEFAULT_SITE_ID, 
 192                 'novaplus': DEFAULT_SITE_ID, 
 193                 'vymena': DEFAULT_SITE_ID, 
 194                 'krasna': DEFAULT_SITE_ID, 
 200             site_id = self._search_regex( 
 201                 r'site=(\d+)', webpage, 'site id', default=None) or SITES.get( 
 202                 site, DEFAULT_SITE_ID) 
 204             config_url = 'https://api.nova.cz/bin/player/videojs/config.php' 
 212         config = self._download_json( 
 213             config_url, display_id, 
 214             'Downloading config JSON', query=config_params, 
 215             transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) 
 217         mediafile = config['mediafile'] 
 218         video_url = mediafile['src'] 
 220         m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url) 
 223                 'url': m.group('url'), 
 224                 'app': m.group('app'), 
 225                 'play_path': m.group('playpath'), 
 226                 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf', 
 233         self._sort_formats(formats) 
 235         title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) 
 236         description = clean_html(self._og_search_description(webpage, default=None)) 
 237         thumbnail = config.get('poster') 
 239         if site == 'novaplus': 
 240             upload_date = unified_strdate(self._search_regex( 
 241                 r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None)) 
 242         elif site == 'fanda': 
 243             upload_date = unified_strdate(self._search_regex( 
 244                 r'<span class="date_time
">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None)) 
 250             'display_id': display_id, 
 252             'description': description, 
 253             'upload_date': upload_date, 
 254             'thumbnail': thumbnail,