1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   9     compat_urllib_parse_unquote
, 
  10     compat_urllib_parse_urlparse
, 
  19 class CeskaTelevizeIE(InfoExtractor
): 
  20     _VALID_URL 
= r
'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' 
  22         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 
  24             'id': '61924494876951776', 
  26             'title': 'Hyde Park Civilizace', 
  27             'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', 
  28             'thumbnail': 're:^https?://.*\.jpg', 
  33             'skip_download': True, 
  36         'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', 
  38             'id': '61924494876844374', 
  40             'title': 'První republika: Zpěvačka z Dupárny Bobina', 
  41             'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', 
  42             'thumbnail': 're:^https?://.*\.jpg', 
  47             'skip_download': True, 
  50         # video with 18+ caution trailer 
  51         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 
  53             'id': '215562210900007-bogotart', 
  54             'title': 'Queer: Bogotart', 
  55             'description': 'Alternativní průvodce současným queer světem', 
  59                 'id': '61924494876844842', 
  61                 'title': 'Queer: Bogotart (Varování 18+)', 
  66                 'id': '61924494877068022', 
  68                 'title': 'Queer: Bogotart (Queer)', 
  69                 'thumbnail': 're:^https?://.*\.jpg', 
  75             'skip_download': True, 
  79     def _real_extract(self
, url
): 
  80         url 
= url
.replace('/porady/', '/ivysilani/').replace('/video/', '') 
  82         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  83         playlist_id 
= mobj
.group('id') 
  85         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
  87         NOT_AVAILABLE_STRING 
= 'This content is not available at your territory due to limited copyright.' 
  88         if '%s</p>' % NOT_AVAILABLE_STRING 
in webpage
: 
  89             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
  91         typ 
= self
._html
_search
_regex
( 
  92             r
'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage
, 'type') 
  93         episode_id 
= self
._html
_search
_regex
( 
  94             r
'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage
, 'episode_id') 
  97             'playlist[0][type]': typ
, 
  98             'playlist[0][id]': episode_id
, 
  99             'requestUrl': compat_urllib_parse_urlparse(url
).path
, 
 100             'requestSource': 'iVysilani', 
 103         req 
= sanitized_Request( 
 104             'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 
 105             data
=compat_urllib_parse
.urlencode(data
)) 
 107         req
.add_header('Content-type', 'application/x-www-form-urlencoded') 
 108         req
.add_header('x-addr', '127.0.0.1') 
 109         req
.add_header('X-Requested-With', 'XMLHttpRequest') 
 110         req
.add_header('Referer', url
) 
 112         playlistpage 
= self
._download
_json
(req
, playlist_id
) 
 114         playlist_url 
= playlistpage
['url'] 
 115         if playlist_url 
== 'error_region': 
 116             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
 118         req 
= sanitized_Request(compat_urllib_parse_unquote(playlist_url
)) 
 119         req
.add_header('Referer', url
) 
 121         playlist_title 
= self
._og
_search
_title
(webpage
) 
 122         playlist_description 
= self
._og
_search
_description
(webpage
) 
 124         playlist 
= self
._download
_json
(req
, playlist_id
)['playlist'] 
 125         playlist_len 
= len(playlist
) 
 128         for item 
in playlist
: 
 130             for format_id
, stream_url 
in item
['streamUrls'].items(): 
 131                 formats
.extend(self
._extract
_m
3u8_formats
( 
 132                     stream_url
, playlist_id
, 'mp4', entry_protocol
='m3u8_native')) 
 133             self
._sort
_formats
(formats
) 
 135             item_id 
= item
.get('id') or item
['assetId'] 
 136             title 
= item
['title'] 
 138             duration 
= float_or_none(item
.get('duration')) 
 139             thumbnail 
= item
.get('previewImageUrl') 
 142             if item
.get('type') == 'VOD': 
 143                 subs 
= item
.get('subtitles') 
 145                     subtitles 
= self
.extract_subtitles(episode_id
, subs
) 
 149                 'title': playlist_title 
if playlist_len 
== 1 else '%s (%s)' % (playlist_title
, title
), 
 150                 'description': playlist_description 
if playlist_len 
== 1 else None, 
 151                 'thumbnail': thumbnail
, 
 152                 'duration': duration
, 
 154                 'subtitles': subtitles
, 
 157         return self
.playlist_result(entries
, playlist_id
, playlist_title
, playlist_description
) 
 159     def _get_subtitles(self
, episode_id
, subs
): 
 160         original_subtitles 
= self
._download
_webpage
( 
 161             subs
[0]['url'], episode_id
, 'Downloading subtitles') 
 162         srt_subs 
= self
._fix
_subtitles
(original_subtitles
) 
 171     def _fix_subtitles(subtitles
): 
 172         """ Convert millisecond-based subtitles to SRT """ 
 174         def _msectotimecode(msec
): 
 175             """ Helper utility to convert milliseconds to timecode """ 
 177             for divider 
in [1000, 60, 60, 100]: 
 178                 components
.append(msec 
% divider
) 
 180             return "{3:02}:{2:02}:{1:02},{0:03}".format(*components
) 
 182         def _fix_subtitle(subtitle
): 
 183             for line 
in subtitle
.splitlines(): 
 184                 m 
= re
.match(r
"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line
) 
 187                     start
, stop 
= (_msectotimecode(int(t
)) for t 
in m
.groups()[1:]) 
 188                     yield "{0} --> {1}".format(start
, stop
) 
 192         return "\r\n".join(_fix_subtitle(subtitles
))