]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvp.py
   1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   9 class TvpIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$' 
  14         'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035', 
  15         'md5': 'cdd98303338b8a7f7abab5cd14092bf2', 
  19             'title': 'Ogniem i mieczem, odc. 2', 
  22         'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536', 
  23         'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 
  27             'title': 'Czas honoru, I seria – odc. 13', 
  30         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', 
  31         'md5': 'c3b15ed1af288131115ff17a17c19dda', 
  35             'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', 
  38         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 
  39         'md5': 'c3b15ed1af288131115ff17a17c19dda', 
  43             'title': 'Na sygnale, odc. 39', 
  47     def _real_extract(self
, url
): 
  48         video_id 
= self
._match
_id
(url
) 
  50         webpage 
= self
._download
_webpage
( 
  51             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id
, video_id
) 
  53         title 
= self
._search
_regex
( 
  54             r
'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', 
  55             webpage, 'title', group='title') 
  56         series_title = self._search_regex( 
  57             r'name\s*:\s*([\'"])SeriesTitle\
1\s
*,\s
*value\s
*:\s
*\
1(?P
<series
>.+?
)\
1', 
  58             webpage, 'series
', group='series
', default=None) 
  60             title = '%s, %s' % (series_title, title) 
  62         thumbnail = self._search_regex( 
  63             r"poster\s*:\s*'([^
']+)'", webpage, 'thumbnail', default=None) 
  65         video_url = self._search_regex( 
  66             r'0:{src:([\'"])(?P
<url
>.*?
)\
1', webpage, 'formats
', group='url
', default=None) 
  68             video_url = self._download_json( 
  69                 'http
://www
.tvp
.pl
/pub
/stat
/videofileinfo?video_id
=%s' % video_id, 
  70                 video_id)['video_url
'] 
  72         ext = video_url.rsplit('.', 1)[-1] 
  73         if ext != 'ism
/manifest
': 
  77                 'format_id
': 'direct
', 
  82             m3u8_url = re.sub('([^
/]*)\
.ism
/manifest
', r'\
1.ism
/\
1.m3u8
', video_url) 
  83             formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4
') 
  85         self._sort_formats(formats) 
  90             'thumbnail
': thumbnail, 
  95 class TvpSeriesIE(InfoExtractor): 
  96     IE_NAME = 'tvp
.pl
:Series
' 
  97     _VALID_URL = r'https?
://vod\
.tvp\
.pl
/(?
:[^
/]+/){2}
(?P
<id>[^
/]+)/?$
' 
 100         'url
': 'http
://vod
.tvp
.pl
/filmy
-fabularne
/filmy
-za
-darmo
/ogniem
-i
-mieczem
', 
 102             'title
': 'Ogniem i mieczem
', 
 107         'url
': 'http
://vod
.tvp
.pl
/audycje
/podroze
/boso
-przez
-swiat
', 
 109             'title
': 'Boso przez świat
', 
 112         'playlist_count
': 86, 
 115     def _real_extract(self, url): 
 116         display_id = self._match_id(url) 
 117         webpage = self._download_webpage(url, display_id, tries=5) 
 119         title = self._html_search_regex( 
 120             r'(?s
) id=[\'"]path[\'"]>(?
:.*? 
/ ){2}
(.*?
)</span
>', webpage, 'series
') 
 121         playlist_id = self._search_regex(r'nodeId
:\s
*(\d
+)', webpage, 'playlist 
id') 
 122         playlist = self._download_webpage( 
 123             'http
://vod
.tvp
.pl
/vod
/seriesAjax?
type=series
&nodeId
=%s&recommend
' 
 124             'edId
=0&sort
=&page
=0&pageSize
=10000' % playlist_id, display_id, tries=5, 
 125             note='Downloading playlist
') 
 127         videos_paths = re.findall( 
 128             '(?s
)class="shortTitle">.*?href
="(/[^"]+)', playlist) 
 130             self.url_result('http
://vod
.tvp
.pl
%s' % v_path, ie=TvpIE.ie_key()) 
 131             for v_path in videos_paths] 
 136             'display_id
': display_id,