]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvp.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  10     get_element_by_attribute
, 
  15 class TVPIE(InfoExtractor
): 
  17     IE_DESC 
= 'Telewizja Polska' 
  18     _VALID_URL 
= r
'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)' 
  21         'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', 
  22         'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 
  26             'title': 'Czas honoru, I seria – odc. 13', 
  27             'description': 'md5:381afa5bca72655fe94b05cfe82bf53d', 
  30         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', 
  31         'md5': 'b0005b542e5b4de643a9690326ab1257', 
  35             'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', 
  36             'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', 
  39         # page id is not the same as video id(#7799) 
  40         'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930', 
  41         'md5': '84cd3c8aec4840046e5ab712416b73d0', 
  45             'title': 'Wiadomości, 28.09.2017, 19:30', 
  46             'description': 'Wydanie główne codziennego serwisu informacyjnego.' 
  49         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 
  50         'only_matching': True, 
  52         'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', 
  53         'only_matching': True, 
  55         'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', 
  56         'only_matching': True, 
  58         'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', 
  59         'only_matching': True, 
  61         'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', 
  62         'only_matching': True, 
  64         'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', 
  65         'only_matching': True, 
  68     def _real_extract(self
, url
): 
  69         page_id 
= self
._match
_id
(url
) 
  70         webpage 
= self
._download
_webpage
(url
, page_id
) 
  71         video_id 
= self
._search
_regex
([ 
  72             r
'<iframe[^>]+src="[^"]*?object_id=(\d+)', 
  73             r
"object_id\s*:\s*'(\d+)'", 
  74             r
'data-video-id="(\d+)"'], webpage
, 'video id', default
=page_id
) 
  76             '_type': 'url_transparent', 
  77             'url': 'tvp:' + video_id
, 
  78             'description': self
._og
_search
_description
(webpage
, default
=None), 
  79             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
  84 class TVPEmbedIE(InfoExtractor
): 
  86     IE_DESC 
= 'Telewizja Polska' 
  87     _VALID_URL 
= r
'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)' 
  90         'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 
  91         'md5': '8c9cd59d16edabf39331f93bf8a766c7', 
  95             'title': 'Panorama, 07.12.2015, 15:40', 
  98         'url': 'tvp:22670268', 
  99         'only_matching': True, 
 102     def _real_extract(self
, url
): 
 103         video_id 
= self
._match
_id
(url
) 
 105         webpage 
= self
._download
_webpage
( 
 106             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id
, video_id
) 
 108         error_massage 
= get_element_by_attribute('class', 'msg error', webpage
) 
 110             raise ExtractorError('%s said: %s' % ( 
 111                 self
.IE_NAME
, clean_html(error_massage
)), expected
=True) 
 113         title 
= self
._search
_regex
( 
 114             r
'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', 
 115             webpage, 'title', group='title') 
 116         series_title = self._search_regex( 
 117             r'name\s*:\s*([\'"])SeriesTitle\
1\s
*,\s
*value\s
*:\s
*\
1(?P
<series
>.+?
)\
1', 
 118             webpage, 'series
', group='series
', default=None) 
 120             title = '%s, %s' % (series_title, title) 
 122         thumbnail = self._search_regex( 
 123             r"poster\s*:\s*'([^
']+)'", webpage, 'thumbnail', default=None) 
 125         video_url = self._search_regex( 
 126             r'0:{src:([\'"])(?P
<url
>.*?
)\
1', webpage, 
 127             'formats
', group='url
', default=None) 
 128         if not video_url or 'material_niedostepny
.mp4
' in video_url: 
 129             video_url = self._download_json( 
 130                 'http
://www
.tvp
.pl
/pub
/stat
/videofileinfo?video_id
=%s' % video_id, 
 131                 video_id)['video_url
'] 
 134         video_url_base = self._search_regex( 
 135             r'(https?
://.+?
/video
)(?
:\
.(?
:ism|f4m|m3u8
)|
-\d
+\
.mp4
)', 
 136             video_url, 'video base url
', default=None) 
 138             # TODO: <Group> found instead of <AdaptationSet> in MPD manifest. 
 139             # It's 
not mentioned 
in MPEG
-DASH standard
. Figure that out
. 
 140             # formats.extend(self._extract_mpd_formats( 
 141             #     video_url_base + '.ism/video.mpd', 
 142             #     video_id, mpd_id='dash', fatal=False)) 
 143             formats
.extend(self
._extract
_ism
_formats
( 
 144                 video_url_base 
+ '.ism/Manifest', 
 145                 video_id
, 'mss', fatal
=False)) 
 146             formats
.extend(self
._extract
_f
4m
_formats
( 
 147                 video_url_base 
+ '.ism/video.f4m', 
 148                 video_id
, f4m_id
='hds', fatal
=False)) 
 149             m3u8_formats 
= self
._extract
_m
3u8_formats
( 
 150                 video_url_base 
+ '.ism/video.m3u8', video_id
, 
 151                 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False) 
 152             self
._sort
_formats
(m3u8_formats
) 
 153             m3u8_formats 
= list(filter( 
 154                 lambda f
: f
.get('vcodec') != 'none', m3u8_formats
)) 
 155             formats
.extend(m3u8_formats
) 
 156             for i
, m3u8_format 
in enumerate(m3u8_formats
, 2): 
 157                 http_url 
= '%s-%d.mp4' % (video_url_base
, i
) 
 158                 if self
._is
_valid
_url
(http_url
, video_id
): 
 159                     f 
= m3u8_format
.copy() 
 162                         'format_id': f
['format_id'].replace('hls', 'http'), 
 168                 'format_id': 'direct', 
 170                 'ext': determine_ext(video_url
, 'mp4'), 
 173         self
._sort
_formats
(formats
) 
 178             'thumbnail': thumbnail
, 
 183 class TVPSeriesIE(InfoExtractor
): 
 184     IE_NAME 
= 'tvp:series' 
 185     _VALID_URL 
= r
'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' 
 188         'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem', 
 190             'title': 'Ogniem i mieczem', 
 195         'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat', 
 197             'title': 'Boso przez świat', 
 200         'playlist_count': 86, 
 203     def _real_extract(self
, url
): 
 204         display_id 
= self
._match
_id
(url
) 
 205         webpage 
= self
._download
_webpage
(url
, display_id
, tries
=5) 
 207         title 
= self
._html
_search
_regex
( 
 208             r
'(?s) id=[\'"]path[\'"]>(?
:.*? 
/ ){2}
(.*?
)</span
>', webpage, 'series
') 
 209         playlist_id = self._search_regex(r'nodeId
:\s
*(\d
+)', webpage, 'playlist 
id') 
 210         playlist = self._download_webpage( 
 211             'http
://vod
.tvp
.pl
/vod
/seriesAjax?
type=series
&nodeId
=%s&recommend
' 
 212             'edId
=0&sort
=&page
=0&pageSize
=10000' % playlist_id, display_id, tries=5, 
 213             note='Downloading playlist
') 
 215         videos_paths = re.findall( 
 216             '(?s
)class="shortTitle">.*?href
="(/[^"]+)', playlist) 
 218             self.url_result('http
://vod
.tvp
.pl
%s' % v_path, ie=TVPIE.ie_key()) 
 219             for v_path in videos_paths] 
 224             'display_id
': display_id,