]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvp.py
5070082da7ba3b34078a01bd214d02a9e8dcac33
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  10     get_element_by_attribute
, 
  15 class TVPIE(InfoExtractor
): 
  17     IE_DESC 
= 'Telewizja Polska' 
  18     _VALID_URL 
= r
'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)' 
  21         'url': 'http://vod.tvp.pl/194536/i-seria-odc-13', 
  22         'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 
  26             'title': 'Czas honoru, I seria – odc. 13', 
  29         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', 
  30         'md5': 'b0005b542e5b4de643a9690326ab1257', 
  34             'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', 
  37         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 
  38         'only_matching': True, 
  40         'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', 
  41         'only_matching': True, 
  43         'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', 
  44         'only_matching': True, 
  46         'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', 
  47         'only_matching': True, 
  49         'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', 
  50         'only_matching': True, 
  52         'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', 
  53         'only_matching': True, 
  56     def _real_extract(self
, url
): 
  57         video_id 
= self
._match
_id
(url
) 
  59         webpage 
= self
._download
_webpage
( 
  60             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id
, video_id
) 
  62         error_massage 
= get_element_by_attribute('class', 'msg error', webpage
) 
  64             raise ExtractorError('%s said: %s' % ( 
  65                 self
.IE_NAME
, clean_html(error_massage
)), expected
=True) 
  67         title 
= self
._search
_regex
( 
  68             r
'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', 
  69             webpage, 'title', group='title') 
  70         series_title = self._search_regex( 
  71             r'name\s*:\s*([\'"])SeriesTitle\
1\s
*,\s
*value\s
*:\s
*\
1(?P
<series
>.+?
)\
1', 
  72             webpage, 'series
', group='series
', default=None) 
  74             title = '%s, %s' % (series_title, title) 
  76         thumbnail = self._search_regex( 
  77             r"poster\s*:\s*'([^
']+)'", webpage, 'thumbnail', default=None) 
  79         video_url = self._search_regex( 
  80             r'0:{src:([\'"])(?P
<url
>.*?
)\
1', webpage, 
  81             'formats
', group='url
', default=None) 
  82         if not video_url or 'material_niedostepny
.mp4
' in video_url: 
  83             video_url = self._download_json( 
  84                 'http
://www
.tvp
.pl
/pub
/stat
/videofileinfo?video_id
=%s' % video_id, 
  85                 video_id)['video_url
'] 
  88         video_url_base = self._search_regex( 
  89             r'(https?
://.+?
/video
)(?
:\
.(?
:ism|f4m|m3u8
)|
-\d
+\
.mp4
)', 
  90             video_url, 'video base url
', default=None) 
  92             # TODO: Current DASH formats are broken - $Time$ pattern in 
  93             # <SegmentTemplate> not implemented yet 
  94             # formats.extend(self._extract_mpd_formats( 
  95             #     video_url_base + '.ism
/video
.mpd
', 
  96             #     video_id, mpd_id='dash
', fatal=False)) 
  97             formats.extend(self._extract_f4m_formats( 
  98                 video_url_base + '.ism
/video
.f4m
', 
  99                 video_id, f4m_id='hds
', fatal=False)) 
 100             m3u8_formats = self._extract_m3u8_formats( 
 101                 video_url_base + '.ism
/video
.m3u8
', video_id, 
 102                 'mp4
', 'm3u8_native
', m3u8_id='hls
', fatal=False) 
 103             self._sort_formats(m3u8_formats) 
 104             m3u8_formats = list(filter( 
 105                 lambda f: f.get('vcodec
') != 'none
' and f.get('resolution
') != 'multiple
', 
 107             formats.extend(m3u8_formats) 
 108             for i, m3u8_format in enumerate(m3u8_formats, 2): 
 109                 http_url = '%s-%d.mp4
' % (video_url_base, i) 
 110                 if self._is_valid_url(http_url, video_id): 
 111                     f = m3u8_format.copy() 
 114                         'format_id
': f['format_id
'].replace('hls
', 'http
'), 
 120                 'format_id
': 'direct
', 
 122                 'ext
': determine_ext(video_url, 'mp4
'), 
 125         self._sort_formats(formats) 
 130             'thumbnail
': thumbnail, 
 135 class TVPSeriesIE(InfoExtractor): 
 136     IE_NAME = 'tvp
:series
' 
 137     _VALID_URL = r'https?
://vod\
.tvp\
.pl
/(?
:[^
/]+/){2}
(?P
<id>[^
/]+)/?$
' 
 140         'url
': 'http
://vod
.tvp
.pl
/filmy
-fabularne
/filmy
-za
-darmo
/ogniem
-i
-mieczem
', 
 142             'title
': 'Ogniem i mieczem
', 
 147         'url
': 'http
://vod
.tvp
.pl
/audycje
/podroze
/boso
-przez
-swiat
', 
 149             'title
': 'Boso przez świat
', 
 152         'playlist_count
': 86, 
 155     def _real_extract(self, url): 
 156         display_id = self._match_id(url) 
 157         webpage = self._download_webpage(url, display_id, tries=5) 
 159         title = self._html_search_regex( 
 160             r'(?s
) id=[\'"]path[\'"]>(?
:.*? 
/ ){2}
(.*?
)</span
>', webpage, 'series
') 
 161         playlist_id = self._search_regex(r'nodeId
:\s
*(\d
+)', webpage, 'playlist 
id') 
 162         playlist = self._download_webpage( 
 163             'http
://vod
.tvp
.pl
/vod
/seriesAjax?
type=series
&nodeId
=%s&recommend
' 
 164             'edId
=0&sort
=&page
=0&pageSize
=10000' % playlist_id, display_id, tries=5, 
 165             note='Downloading playlist
') 
 167         videos_paths = re.findall( 
 168             '(?s
)class="shortTitle">.*?href
="(/[^"]+)', playlist) 
 170             self.url_result('http
://vod
.tvp
.pl
%s' % v_path, ie=TVPIE.ie_key()) 
 171             for v_path in videos_paths] 
 176             'display_id
': display_id,