1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   6 from .subtitles 
import SubtitlesInfoExtractor
 
  10     compat_urllib_parse_urlparse
, 
  18 class CeskaTelevizeIE(SubtitlesInfoExtractor
): 
  19     _VALID_URL 
= r
'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' 
  23             'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 
  25                 'id': '214411058091220', 
  27                 'title': 'Hyde Park Civilizace', 
  28                 'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře', 
  29                 'thumbnail': 're:^https?://.*\.jpg', 
  34                 'skip_download': True, 
  38             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', 
  42                 'title': 'První republika: Zpěvačka z Dupárny Bobina', 
  43                 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', 
  44                 'thumbnail': 're:^https?://.*\.jpg', 
  49                 'skip_download': True, 
  54     def _real_extract(self
, url
): 
  55         url 
= url
.replace('/porady/', '/ivysilani/').replace('/video/', '') 
  57         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  58         video_id 
= mobj
.group('id') 
  60         webpage 
= self
._download
_webpage
(url
, video_id
) 
  62         NOT_AVAILABLE_STRING 
= 'This content is not available at your territory due to limited copyright.' 
  63         if '%s</p>' % NOT_AVAILABLE_STRING 
in webpage
: 
  64             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
  66         typ 
= self
._html
_search
_regex
(r
'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage
, 'type') 
  67         episode_id 
= self
._html
_search
_regex
(r
'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage
, 'episode_id') 
  70             'playlist[0][type]': typ
, 
  71             'playlist[0][id]': episode_id
, 
  72             'requestUrl': compat_urllib_parse_urlparse(url
).path
, 
  73             'requestSource': 'iVysilani', 
  76         req 
= compat_urllib_request
.Request( 
  77             'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', 
  78             data
=compat_urllib_parse
.urlencode(data
)) 
  80         req
.add_header('Content-type', 'application/x-www-form-urlencoded') 
  81         req
.add_header('x-addr', '127.0.0.1') 
  82         req
.add_header('X-Requested-With', 'XMLHttpRequest') 
  83         req
.add_header('Referer', url
) 
  85         playlistpage 
= self
._download
_json
(req
, video_id
) 
  87         playlist_url 
= playlistpage
['url'] 
  88         if playlist_url 
== 'error_region': 
  89             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
  91         req 
= compat_urllib_request
.Request(compat_urllib_parse
.unquote(playlist_url
)) 
  92         req
.add_header('Referer', url
) 
  94         playlist 
= self
._download
_json
(req
, video_id
) 
  96         item 
= playlist
['playlist'][0] 
  98         for format_id
, stream_url 
in item
['streamUrls'].items(): 
  99             formats
.extend(self
._extract
_m
3u8_formats
(stream_url
, video_id
, 'mp4')) 
 100         self
._sort
_formats
(formats
) 
 102         title 
= self
._og
_search
_title
(webpage
) 
 103         description 
= self
._og
_search
_description
(webpage
) 
 104         duration 
= float_or_none(item
.get('duration')) 
 105         thumbnail 
= item
.get('previewImageUrl') 
 108         subs 
= item
.get('subtitles') 
 110             subtitles
['cs'] = subs
[0]['url'] 
 112         if self
._downloader
.params
.get('listsubtitles', False): 
 113             self
._list
_available
_subtitles
(video_id
, subtitles
) 
 116         subtitles 
= self
._fix
_subtitles
(self
.extract_subtitles(video_id
, subtitles
)) 
 121             'description': description
, 
 122             'thumbnail': thumbnail
, 
 123             'duration': duration
, 
 125             'subtitles': subtitles
, 
 129     def _fix_subtitles(subtitles
): 
 130         """ Convert millisecond-based subtitles to SRT """ 
 131         if subtitles 
is None: 
 132             return subtitles  
# subtitles not requested 
 134         def _msectotimecode(msec
): 
 135             """ Helper utility to convert milliseconds to timecode """ 
 137             for divider 
in [1000, 60, 60, 100]: 
 138                 components
.append(msec 
% divider
) 
 140             return "{3:02}:{2:02}:{1:02},{0:03}".format(*components
) 
 142         def _fix_subtitle(subtitle
): 
 143             for line 
in subtitle
.splitlines(): 
 144                 m 
= re
.match(r
"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line
) 
 147                     start
, stop 
= (_msectotimecode(int(t
)) for t 
in m
.groups()[1:]) 
 148                     yield "{0} --> {1}".format(start
, stop
) 
 153         for k
, v 
in subtitles
.items(): 
 154             fixed_subtitles
[k
] = "\r\n".join(_fix_subtitle(v
)) 
 155         return fixed_subtitles