1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   8     compat_urllib_parse_unquote
, 
   9     compat_urllib_parse_urlparse
, 
  19 class CeskaTelevizeIE(InfoExtractor
): 
  20     _VALID_URL 
= r
'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' 
  22         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 
  24             'id': '61924494876951776', 
  26             'title': 'Hyde Park Civilizace', 
  27             'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', 
  28             'thumbnail': 're:^https?://.*\.jpg', 
  33             'skip_download': True, 
  36         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 
  38             'id': '61924494877028507', 
  40             'title': 'Hyde Park Civilizace: Bonus 01 - En', 
  41             'description': 'English Subtittles', 
  42             'thumbnail': 're:^https?://.*\.jpg', 
  47             'skip_download': True, 
  51         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 
  55             'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 
  60             'skip_download': True, 
  62         'skip': 'Georestricted to Czech Republic', 
  64         # video with 18+ caution trailer 
  65         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 
  67             'id': '215562210900007-bogotart', 
  68             'title': 'Queer: Bogotart', 
  69             'description': 'Alternativní průvodce současným queer světem', 
  73                 'id': '61924494876844842', 
  75                 'title': 'Queer: Bogotart (Varování 18+)', 
  80                 'id': '61924494877068022', 
  82                 'title': 'Queer: Bogotart (Queer)', 
  83                 'thumbnail': 're:^https?://.*\.jpg', 
  89             'skip_download': True, 
  93     def _real_extract(self
, url
): 
  94         url 
= url
.replace('/porady/', '/ivysilani/').replace('/video/', '') 
  96         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  97         playlist_id 
= mobj
.group('id') 
  99         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 101         NOT_AVAILABLE_STRING 
= 'This content is not available at your territory due to limited copyright.' 
 102         if '%s</p>' % NOT_AVAILABLE_STRING 
in webpage
: 
 103             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
 105         typ 
= self
._html
_search
_regex
( 
 106             r
'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage
, 'type') 
 107         episode_id 
= self
._html
_search
_regex
( 
 108             r
'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage
, 'episode_id') 
 111             'playlist[0][type]': typ
, 
 112             'playlist[0][id]': episode_id
, 
 113             'requestUrl': compat_urllib_parse_urlparse(url
).path
, 
 114             'requestSource': 'iVysilani', 
 117         req 
= sanitized_Request( 
 118             'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 
 119             data
=urlencode_postdata(data
)) 
 121         req
.add_header('Content-type', 'application/x-www-form-urlencoded') 
 122         req
.add_header('x-addr', '127.0.0.1') 
 123         req
.add_header('X-Requested-With', 'XMLHttpRequest') 
 124         req
.add_header('Referer', url
) 
 126         playlistpage 
= self
._download
_json
(req
, playlist_id
) 
 128         playlist_url 
= playlistpage
['url'] 
 129         if playlist_url 
== 'error_region': 
 130             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
 132         req 
= sanitized_Request(compat_urllib_parse_unquote(playlist_url
)) 
 133         req
.add_header('Referer', url
) 
 135         playlist_title 
= self
._og
_search
_title
(webpage
, default
=None) 
 136         playlist_description 
= self
._og
_search
_description
(webpage
, default
=None) 
 138         playlist 
= self
._download
_json
(req
, playlist_id
)['playlist'] 
 139         playlist_len 
= len(playlist
) 
 142         for item 
in playlist
: 
 143             is_live 
= item
.get('type') == 'LIVE' 
 145             for format_id
, stream_url 
in item
['streamUrls'].items(): 
 146                 formats
.extend(self
._extract
_m
3u8_formats
( 
 147                     stream_url
, playlist_id
, 'mp4', 
 148                     entry_protocol
='m3u8' if is_live 
else 'm3u8_native', 
 150             self
._sort
_formats
(formats
) 
 152             item_id 
= item
.get('id') or item
['assetId'] 
 153             title 
= item
['title'] 
 155             duration 
= float_or_none(item
.get('duration')) 
 156             thumbnail 
= item
.get('previewImageUrl') 
 159             if item
.get('type') == 'VOD': 
 160                 subs 
= item
.get('subtitles') 
 162                     subtitles 
= self
.extract_subtitles(episode_id
, subs
) 
 164             if playlist_len 
== 1: 
 165                 final_title 
= playlist_title 
or title
 
 167                     final_title 
= self
._live
_title
(final_title
) 
 169                 final_title 
= '%s (%s)' % (playlist_title
, title
) 
 173                 'title': final_title
, 
 174                 'description': playlist_description 
if playlist_len 
== 1 else None, 
 175                 'thumbnail': thumbnail
, 
 176                 'duration': duration
, 
 178                 'subtitles': subtitles
, 
 182         return self
.playlist_result(entries
, playlist_id
, playlist_title
, playlist_description
) 
 184     def _get_subtitles(self
, episode_id
, subs
): 
 185         original_subtitles 
= self
._download
_webpage
( 
 186             subs
[0]['url'], episode_id
, 'Downloading subtitles') 
 187         srt_subs 
= self
._fix
_subtitles
(original_subtitles
) 
 196     def _fix_subtitles(subtitles
): 
 197         """ Convert millisecond-based subtitles to SRT """ 
 199         def _msectotimecode(msec
): 
 200             """ Helper utility to convert milliseconds to timecode """ 
 202             for divider 
in [1000, 60, 60, 100]: 
 203                 components
.append(msec 
% divider
) 
 205             return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components
) 
 207         def _fix_subtitle(subtitle
): 
 208             for line 
in subtitle
.splitlines(): 
 209                 m 
= re
.match(r
'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line
) 
 212                     start
, stop 
= (_msectotimecode(int(t
)) for t 
in m
.groups()[1:]) 
 213                     yield '{0} --> {1}'.format(start
, stop
) 
 217         return '\r\n'.join(_fix_subtitle(subtitles
))