]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/canvas.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..utils 
import float_or_none
 
   9 class CanvasIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
  12         'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', 
  13         'md5': 'ea838375a547ac787d4064d8c7860a6c', 
  15             'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', 
  16             'display_id': 'de-afspraak-veilt-voor-de-warmste-week', 
  18             'title': 'De afspraak veilt voor de Warmste Week', 
  19             'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', 
  20             'thumbnail': 're:^https?://.*\.jpg$', 
  25         'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', 
  27             'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625', 
  28             'display_id': 'pieter-0167', 
  30             'title': 'Pieter 0167', 
  31             'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', 
  32             'thumbnail': 're:^https?://.*\.jpg$', 
  41             'skip_download': True, 
  44         'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', 
  46             'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', 
  47             'display_id': 'herbekijk-sorry-voor-alles', 
  49             'title': 'Herbekijk Sorry voor alles', 
  50             'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', 
  51             'thumbnail': 're:^https?://.*\.jpg$', 
  55             'skip_download': True, 
  58         'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', 
  59         'only_matching': True, 
  62     def _real_extract(self
, url
): 
  63         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  64         site_id
, display_id 
= mobj
.group('site_id'), mobj
.group('id') 
  66         webpage 
= self
._download
_webpage
(url
, display_id
) 
  68         title 
= (self
._search
_regex
( 
  69             r
'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', 
  70             webpage
, 'title', default
=None) or self
._og
_search
_title
( 
  73         video_id 
= self
._html
_search
_regex
( 
  74             r
'data-video=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', webpage, 'video 
id', group='id') 
  76         data = self._download_json( 
  77             'https
://mediazone
.vrt
.be
/api
/v1
/%s/assets
/%s' 
  78             % (site_id, video_id), display_id) 
  81         for target in data['targetUrls
']: 
  82             format_url, format_type = target.get('url
'), target.get('type') 
  83             if not format_url or not format_type: 
  85             if format_type == 'HLS
': 
  86                 formats.extend(self._extract_m3u8_formats( 
  87                     format_url, display_id, entry_protocol='m3u8_native
', 
  88                     ext='mp4
', preference=0, fatal=False, m3u8_id=format_type)) 
  89             elif format_type == 'HDS
': 
  90                 formats.extend(self._extract_f4m_formats( 
  91                     format_url, display_id, f4m_id=format_type, fatal=False)) 
  94                     'format_id
': format_type, 
  97         self._sort_formats(formats) 
 100         subtitle_urls = data.get('subtitleUrls
') 
 101         if isinstance(subtitle_urls, list): 
 102             for subtitle in subtitle_urls: 
 103                 subtitle_url = subtitle.get('url
') 
 104                 if subtitle_url and subtitle.get('type') == 'CLOSED
': 
 105                     subtitles.setdefault('nl
', []).append({'url
': subtitle_url}) 
 109             'display_id
': display_id, 
 111             'description
': self._og_search_description(webpage), 
 113             'duration
': float_or_none(data.get('duration
'), 1000), 
 114             'thumbnail
': data.get('posterImageUrl
'), 
 115             'subtitles
': subtitles,