2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   8     compat_urllib_parse_unquote
, 
   9     compat_urllib_parse_urlparse
, 
  21 class CeskaTelevizeIE(InfoExtractor
): 
  22     _VALID_URL 
= r
'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' 
  24         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 
  26             'id': '61924494877246241', 
  28             'title': 'Hyde Park Civilizace: Život v Grónsku', 
  29             'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', 
  30             'thumbnail': r
're:^https?://.*\.jpg', 
  35             'skip_download': True, 
  38         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 
  40             'id': '61924494877028507', 
  42             'title': 'Hyde Park Civilizace: Bonus 01 - En', 
  43             'description': 'English Subtittles', 
  44             'thumbnail': r
're:^https?://.*\.jpg', 
  49             'skip_download': True, 
  53         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 
  57             'title': r
're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 
  62             'skip_download': True, 
  64         'skip': 'Georestricted to Czech Republic', 
  66         'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 
  67         'only_matching': True, 
  70     def _real_extract(self
, url
): 
  71         playlist_id 
= self
._match
_id
(url
) 
  73         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
  75         NOT_AVAILABLE_STRING 
= 'This content is not available at your territory due to limited copyright.' 
  76         if '%s</p>' % NOT_AVAILABLE_STRING 
in webpage
: 
  77             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
  82         playlist 
= self
._parse
_json
( 
  84                 r
'getPlaylistUrl\(\[({.+?})\]', webpage
, 'playlist', 
  85                 default
='{}'), playlist_id
) 
  87             type_ 
= playlist
.get('type') 
  88             episode_id 
= playlist
.get('id') 
  91             type_ 
= self
._html
_search
_regex
( 
  92                 r
'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', 
  95             episode_id 
= self
._html
_search
_regex
( 
  96                 r
'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', 
  97                 webpage
, 'episode_id') 
 100             'playlist[0][type]': type_
, 
 101             'playlist[0][id]': episode_id
, 
 102             'requestUrl': compat_urllib_parse_urlparse(url
).path
, 
 103             'requestSource': 'iVysilani', 
 108         for user_agent 
in (None, USER_AGENTS
['Safari']): 
 109             req 
= sanitized_Request( 
 110                 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 
 111                 data
=urlencode_postdata(data
)) 
 113             req
.add_header('Content-type', 'application/x-www-form-urlencoded') 
 114             req
.add_header('x-addr', '127.0.0.1') 
 115             req
.add_header('X-Requested-With', 'XMLHttpRequest') 
 117                 req
.add_header('User-Agent', user_agent
) 
 118             req
.add_header('Referer', url
) 
 120             playlistpage 
= self
._download
_json
(req
, playlist_id
, fatal
=False) 
 125             playlist_url 
= playlistpage
['url'] 
 126             if playlist_url 
== 'error_region': 
 127                 raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
 129             req 
= sanitized_Request(compat_urllib_parse_unquote(playlist_url
)) 
 130             req
.add_header('Referer', url
) 
 132             playlist_title 
= self
._og
_search
_title
(webpage
, default
=None) 
 133             playlist_description 
= self
._og
_search
_description
(webpage
, default
=None) 
 135             playlist 
= self
._download
_json
(req
, playlist_id
, fatal
=False) 
 139             playlist 
= playlist
.get('playlist') 
 140             if not isinstance(playlist
, list): 
 143             playlist_len 
= len(playlist
) 
 145             for num
, item 
in enumerate(playlist
): 
 146                 is_live 
= item
.get('type') == 'LIVE' 
 148                 for format_id
, stream_url 
in item
.get('streamUrls', {}).items(): 
 149                     if 'playerType=flash' in stream_url
: 
 150                         stream_formats 
= self
._extract
_m
3u8_formats
( 
 151                             stream_url
, playlist_id
, 'mp4', 'm3u8_native', 
 152                             m3u8_id
='hls-%s' % format_id
, fatal
=False) 
 154                         stream_formats 
= self
._extract
_mpd
_formats
( 
 155                             stream_url
, playlist_id
, 
 156                             mpd_id
='dash-%s' % format_id
, fatal
=False) 
 157                     # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031 
 158                     if format_id 
== 'audioDescription': 
 159                         for f 
in stream_formats
: 
 160                             f
['source_preference'] = -10 
 161                     formats
.extend(stream_formats
) 
 163                 if user_agent 
and len(entries
) == playlist_len
: 
 164                     entries
[num
]['formats'].extend(formats
) 
 167                 item_id 
= item
.get('id') or item
['assetId'] 
 168                 title 
= item
['title'] 
 170                 duration 
= float_or_none(item
.get('duration')) 
 171                 thumbnail 
= item
.get('previewImageUrl') 
 174                 if item
.get('type') == 'VOD': 
 175                     subs 
= item
.get('subtitles') 
 177                         subtitles 
= self
.extract_subtitles(episode_id
, subs
) 
 179                 if playlist_len 
== 1: 
 180                     final_title 
= playlist_title 
or title
 
 182                         final_title 
= self
._live
_title
(final_title
) 
 184                     final_title 
= '%s (%s)' % (playlist_title
, title
) 
 188                     'title': final_title
, 
 189                     'description': playlist_description 
if playlist_len 
== 1 else None, 
 190                     'thumbnail': thumbnail
, 
 191                     'duration': duration
, 
 193                     'subtitles': subtitles
, 
 198             self
._sort
_formats
(e
['formats']) 
 200         return self
.playlist_result(entries
, playlist_id
, playlist_title
, playlist_description
) 
 202     def _get_subtitles(self
, episode_id
, subs
): 
 203         original_subtitles 
= self
._download
_webpage
( 
 204             subs
[0]['url'], episode_id
, 'Downloading subtitles') 
 205         srt_subs 
= self
._fix
_subtitles
(original_subtitles
) 
 214     def _fix_subtitles(subtitles
): 
 215         """ Convert millisecond-based subtitles to SRT """ 
 217         def _msectotimecode(msec
): 
 218             """ Helper utility to convert milliseconds to timecode """ 
 220             for divider 
in [1000, 60, 60, 100]: 
 221                 components
.append(msec 
% divider
) 
 223             return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components
) 
 225         def _fix_subtitle(subtitle
): 
 226             for line 
in subtitle
.splitlines(): 
 227                 m 
= re
.match(r
'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line
) 
 230                     start
, stop 
= (_msectotimecode(int(t
)) for t 
in m
.groups()[1:]) 
 231                     yield '{0} --> {1}'.format(start
, stop
) 
 235         return '\r\n'.join(_fix_subtitle(subtitles
)) 
 238 class CeskaTelevizePoradyIE(InfoExtractor
): 
 239     _VALID_URL 
= r
'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' 
 241         # video with 18+ caution trailer 
 242         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 
 244             'id': '215562210900007-bogotart', 
 245             'title': 'Queer: Bogotart', 
 246             'description': 'Alternativní průvodce současným queer světem', 
 250                 'id': '61924494876844842', 
 252                 'title': 'Queer: Bogotart (Varování 18+)', 
 257                 'id': '61924494877068022', 
 259                 'title': 'Queer: Bogotart (Queer)', 
 260                 'thumbnail': r
're:^https?://.*\.jpg', 
 266             'skip_download': True, 
 270     def _real_extract(self
, url
): 
 271         video_id 
= self
._match
_id
(url
) 
 273         webpage 
= self
._download
_webpage
(url
, video_id
) 
 275         data_url 
= unescapeHTML(self
._search
_regex
( 
 276             r
'<span[^>]*\bdata-url=(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', 
 277             webpage, 'iframe player url
', group='url
')) 
 279         return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())