]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvp.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  10     get_element_by_attribute
, 
  15 class TVPIE(InfoExtractor
): 
  17     IE_DESC 
= 'Telewizja Polska' 
  18     _VALID_URL 
= r
'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)' 
  21         'url': 'http://vod.tvp.pl/194536/i-seria-odc-13', 
  22         'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 
  26             'title': 'Czas honoru, I seria – odc. 13', 
  27             'description': 'md5:76649d2014f65c99477be17f23a4dead', 
  30         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', 
  31         'md5': 'b0005b542e5b4de643a9690326ab1257', 
  35             'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', 
  36             'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', 
  39         # page id is not the same as video id(#7799) 
  40         'url': 'http://vod.tvp.pl/22704887/08122015-1500', 
  41         'md5': 'cf6a4705dfd1489aef8deb168d6ba742', 
  45             'title': 'Wiadomości, 08.12.2015, 15:00', 
  48         'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 
  49         'only_matching': True, 
  51         'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', 
  52         'only_matching': True, 
  54         'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', 
  55         'only_matching': True, 
  57         'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', 
  58         'only_matching': True, 
  60         'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', 
  61         'only_matching': True, 
  63         'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', 
  64         'only_matching': True, 
  67     def _real_extract(self
, url
): 
  68         page_id 
= self
._match
_id
(url
) 
  69         webpage 
= self
._download
_webpage
(url
, page_id
) 
  70         video_id 
= self
._search
_regex
([ 
  71             r
'<iframe[^>]+src="[^"]*?object_id=(\d+)', 
  72             r
"object_id\s*:\s*'(\d+)'", 
  73             r
'data-video-id="(\d+)"'], webpage
, 'video id', default
=page_id
) 
  75             '_type': 'url_transparent', 
  76             'url': 'tvp:' + video_id
, 
  77             'description': self
._og
_search
_description
(webpage
, default
=None), 
  78             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
  83 class TVPEmbedIE(InfoExtractor
): 
  85     IE_DESC 
= 'Telewizja Polska' 
  86     _VALID_URL 
= r
'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)' 
  89         'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 
  90         'md5': '8c9cd59d16edabf39331f93bf8a766c7', 
  94             'title': 'Panorama, 07.12.2015, 15:40', 
  97         'url': 'tvp:22670268', 
  98         'only_matching': True, 
 101     def _real_extract(self
, url
): 
 102         video_id 
= self
._match
_id
(url
) 
 104         webpage 
= self
._download
_webpage
( 
 105             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id
, video_id
) 
 107         error_massage 
= get_element_by_attribute('class', 'msg error', webpage
) 
 109             raise ExtractorError('%s said: %s' % ( 
 110                 self
.IE_NAME
, clean_html(error_massage
)), expected
=True) 
 112         title 
= self
._search
_regex
( 
 113             r
'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', 
 114             webpage, 'title', group='title') 
 115         series_title = self._search_regex( 
 116             r'name\s*:\s*([\'"])SeriesTitle\
1\s
*,\s
*value\s
*:\s
*\
1(?P
<series
>.+?
)\
1', 
 117             webpage, 'series
', group='series
', default=None) 
 119             title = '%s, %s' % (series_title, title) 
 121         thumbnail = self._search_regex( 
 122             r"poster\s*:\s*'([^
']+)'", webpage, 'thumbnail', default=None) 
 124         video_url = self._search_regex( 
 125             r'0:{src:([\'"])(?P
<url
>.*?
)\
1', webpage, 
 126             'formats
', group='url
', default=None) 
 127         if not video_url or 'material_niedostepny
.mp4
' in video_url: 
 128             video_url = self._download_json( 
 129                 'http
://www
.tvp
.pl
/pub
/stat
/videofileinfo?video_id
=%s' % video_id, 
 130                 video_id)['video_url
'] 
 133         video_url_base = self._search_regex( 
 134             r'(https?
://.+?
/video
)(?
:\
.(?
:ism|f4m|m3u8
)|
-\d
+\
.mp4
)', 
 135             video_url, 'video base url
', default=None) 
 137             # TODO: <Group> found instead of <AdaptationSet> in MPD manifest. 
 138             # It's 
not mentioned 
in MPEG
-DASH standard
. Figure that out
. 
 139             # formats.extend(self._extract_mpd_formats( 
 140             #     video_url_base + '.ism/video.mpd', 
 141             #     video_id, mpd_id='dash', fatal=False)) 
 142             formats
.extend(self
._extract
_ism
_formats
( 
 143                 video_url_base 
+ '.ism/Manifest', 
 144                 video_id
, 'mss', fatal
=False)) 
 145             formats
.extend(self
._extract
_f
4m
_formats
( 
 146                 video_url_base 
+ '.ism/video.f4m', 
 147                 video_id
, f4m_id
='hds', fatal
=False)) 
 148             m3u8_formats 
= self
._extract
_m
3u8_formats
( 
 149                 video_url_base 
+ '.ism/video.m3u8', video_id
, 
 150                 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False) 
 151             self
._sort
_formats
(m3u8_formats
) 
 152             m3u8_formats 
= list(filter( 
 153                 lambda f
: f
.get('vcodec') != 'none', m3u8_formats
)) 
 154             formats
.extend(m3u8_formats
) 
 155             for i
, m3u8_format 
in enumerate(m3u8_formats
, 2): 
 156                 http_url 
= '%s-%d.mp4' % (video_url_base
, i
) 
 157                 if self
._is
_valid
_url
(http_url
, video_id
): 
 158                     f 
= m3u8_format
.copy() 
 161                         'format_id': f
['format_id'].replace('hls', 'http'), 
 167                 'format_id': 'direct', 
 169                 'ext': determine_ext(video_url
, 'mp4'), 
 172         self
._sort
_formats
(formats
) 
 177             'thumbnail': thumbnail
, 
 182 class TVPSeriesIE(InfoExtractor
): 
 183     IE_NAME 
= 'tvp:series' 
 184     _VALID_URL 
= r
'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' 
 187         'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem', 
 189             'title': 'Ogniem i mieczem', 
 194         'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat', 
 196             'title': 'Boso przez świat', 
 199         'playlist_count': 86, 
 202     def _real_extract(self
, url
): 
 203         display_id 
= self
._match
_id
(url
) 
 204         webpage 
= self
._download
_webpage
(url
, display_id
, tries
=5) 
 206         title 
= self
._html
_search
_regex
( 
 207             r
'(?s) id=[\'"]path[\'"]>(?
:.*? 
/ ){2}
(.*?
)</span
>', webpage, 'series
') 
 208         playlist_id = self._search_regex(r'nodeId
:\s
*(\d
+)', webpage, 'playlist 
id') 
 209         playlist = self._download_webpage( 
 210             'http
://vod
.tvp
.pl
/vod
/seriesAjax?
type=series
&nodeId
=%s&recommend
' 
 211             'edId
=0&sort
=&page
=0&pageSize
=10000' % playlist_id, display_id, tries=5, 
 212             note='Downloading playlist
') 
 214         videos_paths = re.findall( 
 215             '(?s
)class="shortTitle">.*?href
="(/[^"]+)', playlist) 
 217             self.url_result('http
://vod
.tvp
.pl
%s' % v_path, ie=TVPIE.ie_key()) 
 218             for v_path in videos_paths] 
 223             'display_id
': display_id,