1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  10     compat_urllib_parse_urlparse
, 
  15 class CeskaTelevizeIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' 
  20             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka', 
  22                 'id': '213512120230004', 
  24                 'title': 'První republika: Španělská chřipka', 
  28                 'skip_download': True,  # requires rtmpdump 
  30             'skip': 'Works only from Czech Republic.', 
  33             'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt', 
  37                 'title': 'Tsatsiki, maminka a policajt', 
  41                 'skip_download': True,  # requires rtmpdump 
  43             'skip': 'Works only from Czech Republic.', 
  46             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', 
  50                 'title': 'První republika: Zpěvačka z Dupárny Bobina', 
  54                 'skip_download': True,  # requires rtmpdump 
  59     def _real_extract(self
, url
): 
  60         url 
= url
.replace('/porady/', '/ivysilani/').replace('/video/', '') 
  62         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  63         video_id 
= mobj
.group('id') 
  65         webpage 
= self
._download
_webpage
(url
, video_id
) 
  67         NOT_AVAILABLE_STRING 
= 'This content is not available at your territory due to limited copyright.' 
  68         if '%s</p>' % NOT_AVAILABLE_STRING 
in webpage
: 
  69             raise ExtractorError(NOT_AVAILABLE_STRING
, expected
=True) 
  71         typ 
= self
._html
_search
_regex
(r
'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage
, 'type') 
  72         episode_id 
= self
._html
_search
_regex
(r
'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage
, 'episode_id') 
  75             'playlist[0][type]': typ
, 
  76             'playlist[0][id]': episode_id
, 
  77             'requestUrl': compat_urllib_parse_urlparse(url
).path
, 
  78             'requestSource': 'iVysilani', 
  81         req 
= compat_urllib_request
.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url', 
  82                                             data
=compat_urllib_parse
.urlencode(data
)) 
  84         req
.add_header('Content-type', 'application/x-www-form-urlencoded') 
  85         req
.add_header('x-addr', '127.0.0.1') 
  86         req
.add_header('X-Requested-With', 'XMLHttpRequest') 
  87         req
.add_header('Referer', url
) 
  89         playlistpage 
= self
._download
_json
(req
, video_id
) 
  91         req 
= compat_urllib_request
.Request(compat_urllib_parse
.unquote(playlistpage
['url'])) 
  92         req
.add_header('Referer', url
) 
  94         playlist 
= self
._download
_xml
(req
, video_id
) 
  97         for i 
in playlist
.find('smilRoot/body'): 
  98             if 'AD' not in i
.attrib
['id']: 
  99                 base_url 
= i
.attrib
['base'] 
 100                 parsedurl 
= compat_urllib_parse_urlparse(base_url
) 
 101                 duration 
= i
.attrib
['duration'] 
 103                 for video 
in i
.findall('video'): 
 104                     if video
.attrib
['label'] != 'AD': 
 105                         format_id 
= video
.attrib
['label'] 
 106                         play_path 
= video
.attrib
['src'] 
 107                         vbr 
= int(video
.attrib
['system-bitrate']) 
 110                             'format_id': format_id
, 
 113                             'play_path': play_path
, 
 114                             'app': parsedurl
.path
[1:] + '?' + parsedurl
.query
, 
 119         self
._sort
_formats
(formats
) 
 123             'title': self
._html
_search
_regex
(r
'<title>(.+?) — iVysílání — Česká televize</title>', webpage
, 'title'), 
 124             'duration': float(duration
),