2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   8     compat_urllib_parse_unquote
, 
   9     compat_urllib_parse_urlparse
, 
  22 class CeskaTelevizeIE(InfoExtractor
): 
  23     _VALID_URL 
= r
'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' 
  25         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 
  27             'id': '61924494877246241', 
  29             'title': 'Hyde Park Civilizace: Život v Grónsku', 
  30             'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', 
  31             'thumbnail': r
're:^https?://.*\.jpg', 
  36             'skip_download': True, 
  39         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 
  41             'id': '61924494877028507', 
  43             'title': 'Hyde Park Civilizace: Bonus 01 - En', 
  44             'description': 'English Subtittles', 
  45             'thumbnail': r
're:^https?://.*\.jpg', 
  50             'skip_download': True, 
  54         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 
  58             'title': r
're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 
  63             'skip_download': True, 
  65         'skip': 'Georestricted to Czech Republic', 
  67         'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 
  68         'only_matching': True, 
  71     def _real_extract(self
, url
): 
  72         playlist_id 
= self
._match
_id
(url
) 
  74         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
  76         NOT_AVAILABLE_STRING 
= 'This content is not available at your territory due to limited copyright.' 
  77         if '%s</p>' % NOT_AVAILABLE_STRING 
in webpage
: 
  78             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
  83         playlist 
= self
._parse
_json
( 
  85                 r
'getPlaylistUrl\(\[({.+?})\]', webpage
, 'playlist', 
  86                 default
='{}'), playlist_id
) 
  88             type_ 
= playlist
.get('type') 
  89             episode_id 
= playlist
.get('id') 
  92             type_ 
= self
._html
_search
_regex
( 
  93                 r
'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', 
  96             episode_id 
= self
._html
_search
_regex
( 
  97                 r
'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', 
  98                 webpage
, 'episode_id') 
 101             'playlist[0][type]': type_
, 
 102             'playlist[0][id]': episode_id
, 
 103             'requestUrl': compat_urllib_parse_urlparse(url
).path
, 
 104             'requestSource': 'iVysilani', 
 109         for user_agent 
in (None, USER_AGENTS
['Safari']): 
 110             req 
= sanitized_Request( 
 111                 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 
 112                 data
=urlencode_postdata(data
)) 
 114             req
.add_header('Content-type', 'application/x-www-form-urlencoded') 
 115             req
.add_header('x-addr', '127.0.0.1') 
 116             req
.add_header('X-Requested-With', 'XMLHttpRequest') 
 118                 req
.add_header('User-Agent', user_agent
) 
 119             req
.add_header('Referer', url
) 
 121             playlistpage 
= self
._download
_json
(req
, playlist_id
, fatal
=False) 
 126             playlist_url 
= playlistpage
['url'] 
 127             if playlist_url 
== 'error_region': 
 128                 raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
 130             req 
= sanitized_Request(compat_urllib_parse_unquote(playlist_url
)) 
 131             req
.add_header('Referer', url
) 
 133             playlist_title 
= self
._og
_search
_title
(webpage
, default
=None) 
 134             playlist_description 
= self
._og
_search
_description
(webpage
, default
=None) 
 136             playlist 
= self
._download
_json
(req
, playlist_id
, fatal
=False) 
 140             playlist 
= playlist
.get('playlist') 
 141             if not isinstance(playlist
, list): 
 144             playlist_len 
= len(playlist
) 
 146             for num
, item 
in enumerate(playlist
): 
 147                 is_live 
= item
.get('type') == 'LIVE' 
 149                 for format_id
, stream_url 
in item
.get('streamUrls', {}).items(): 
 150                     if 'drmOnly=true' in stream_url
: 
 152                     if 'playerType=flash' in stream_url
: 
 153                         stream_formats 
= self
._extract
_m
3u8_formats
( 
 154                             stream_url
, playlist_id
, 'mp4', 'm3u8_native', 
 155                             m3u8_id
='hls-%s' % format_id
, fatal
=False) 
 157                         stream_formats 
= self
._extract
_mpd
_formats
( 
 158                             stream_url
, playlist_id
, 
 159                             mpd_id
='dash-%s' % format_id
, fatal
=False) 
 160                     # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 
 161                     if format_id 
== 'audioDescription': 
 162                         for f 
in stream_formats
: 
 163                             f
['source_preference'] = -10 
 164                     formats
.extend(stream_formats
) 
 166                 if user_agent 
and len(entries
) == playlist_len
: 
 167                     entries
[num
]['formats'].extend(formats
) 
 170                 item_id 
= item
.get('id') or item
['assetId'] 
 171                 title 
= item
['title'] 
 173                 duration 
= float_or_none(item
.get('duration')) 
 174                 thumbnail 
= item
.get('previewImageUrl') 
 177                 if item
.get('type') == 'VOD': 
 178                     subs 
= item
.get('subtitles') 
 180                         subtitles 
= self
.extract_subtitles(episode_id
, subs
) 
 182                 if playlist_len 
== 1: 
 183                     final_title 
= playlist_title 
or title
 
 185                         final_title 
= self
._live
_title
(final_title
) 
 187                     final_title 
= '%s (%s)' % (playlist_title
, title
) 
 191                     'title': final_title
, 
 192                     'description': playlist_description 
if playlist_len 
== 1 else None, 
 193                     'thumbnail': thumbnail
, 
 194                     'duration': duration
, 
 196                     'subtitles': subtitles
, 
 201             self
._sort
_formats
(e
['formats']) 
 203         return self
.playlist_result(entries
, playlist_id
, playlist_title
, playlist_description
) 
 205     def _get_subtitles(self
, episode_id
, subs
): 
 206         original_subtitles 
= self
._download
_webpage
( 
 207             subs
[0]['url'], episode_id
, 'Downloading subtitles') 
 208         srt_subs 
= self
._fix
_subtitles
(original_subtitles
) 
 217     def _fix_subtitles(subtitles
): 
 218         """ Convert millisecond-based subtitles to SRT """ 
 220         def _msectotimecode(msec
): 
 221             """ Helper utility to convert milliseconds to timecode """ 
 223             for divider 
in [1000, 60, 60, 100]: 
 224                 components
.append(msec 
% divider
) 
 226             return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components
) 
 228         def _fix_subtitle(subtitle
): 
 229             for line 
in subtitle
.splitlines(): 
 230                 m 
= re
.match(r
'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line
) 
 233                     start
, stop 
= (_msectotimecode(int(t
)) for t 
in m
.groups()[1:]) 
 234                     yield '{0} --> {1}'.format(start
, stop
) 
 238         return '\r\n'.join(_fix_subtitle(subtitles
)) 
 241 class CeskaTelevizePoradyIE(InfoExtractor
): 
 242     _VALID_URL 
= r
'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' 
 244         # video with 18+ caution trailer 
 245         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 
 247             'id': '215562210900007-bogotart', 
 248             'title': 'Queer: Bogotart', 
 249             'description': 'Alternativní průvodce současným queer světem', 
 253                 'id': '61924494876844842', 
 255                 'title': 'Queer: Bogotart (Varování 18+)', 
 260                 'id': '61924494877068022', 
 262                 'title': 'Queer: Bogotart (Queer)', 
 263                 'thumbnail': r
're:^https?://.*\.jpg', 
 269             'skip_download': True, 
 273         'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', 
 274         'only_matching': True, 
 277     def _real_extract(self
, url
): 
 278         video_id 
= self
._match
_id
(url
) 
 280         webpage 
= self
._download
_webpage
(url
, video_id
) 
 282         data_url 
= update_url_query(unescapeHTML(self
._search
_regex
( 
 283             (r
'<span[^>]*\bdata-url=(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', 
 284              r'<iframe
[^
>]+\bsrc
=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), 
 285             webpage, 'iframe player url', group='url')), query={ 
 289         return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())