]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/canvas.py
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
   4 from ..utils 
import float_or_none
 
   7 class CanvasIE(InfoExtractor
): 
   8     _VALID_URL 
= r
'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
  10         'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', 
  11         'md5': 'ea838375a547ac787d4064d8c7860a6c', 
  13             'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', 
  14             'display_id': 'de-afspraak-veilt-voor-de-warmste-week', 
  16             'title': 'De afspraak veilt voor de Warmste Week', 
  17             'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', 
  18             'thumbnail': 're:^https?://.*\.jpg$', 
  23         'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', 
  25             'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625', 
  26             'display_id': 'pieter-0167', 
  28             'title': 'Pieter 0167', 
  29             'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', 
  30             'thumbnail': 're:^https?://.*\.jpg$', 
  39             'skip_download': True, 
  43     def _real_extract(self
, url
): 
  44         display_id 
= self
._match
_id
(url
) 
  46         webpage 
= self
._download
_webpage
(url
, display_id
) 
  48         title 
= self
._search
_regex
( 
  49             r
'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', 
  50             webpage
, 'title', default
=None) or self
._og
_search
_title
(webpage
) 
  52         video_id 
= self
._html
_search
_regex
( 
  53             r
'data-video=(["\'])(?P
<id>.+?
)\
1', webpage, 'video 
id', group='id') 
  55         data = self._download_json( 
  56             'https
://mediazone
.vrt
.be
/api
/v1
/canvas
/assets
/%s' % video_id, display_id) 
  59         for target in data['targetUrls
']: 
  60             format_url, format_type = target.get('url
'), target.get('type') 
  61             if not format_url or not format_type: 
  63             if format_type == 'HLS
': 
  64                 formats.extend(self._extract_m3u8_formats( 
  65                     format_url, display_id, entry_protocol='m3u8_native
', 
  66                     ext='mp4
', preference=0, fatal=False, m3u8_id=format_type)) 
  67             elif format_type == 'HDS
': 
  68                 formats.extend(self._extract_f4m_formats( 
  69                     format_url, display_id, f4m_id=format_type, fatal=False)) 
  72                     'format_id
': format_type, 
  75         self._sort_formats(formats) 
  78         subtitle_urls = data.get('subtitleUrls
') 
  79         if isinstance(subtitle_urls, list): 
  80             for subtitle in subtitle_urls: 
  81                 subtitle_url = subtitle.get('url
') 
  82                 if subtitle_url and subtitle.get('type') == 'CLOSED
': 
  83                     subtitles.setdefault('nl
', []).append({'url
': subtitle_url}) 
  87             'display_id
': display_id, 
  89             'description
': self._og_search_description(webpage), 
  91             'duration
': float_or_none(data.get('duration
'), 1000), 
  92             'thumbnail
': data.get('posterImageUrl
'), 
  93             'subtitles
': subtitles,