]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nfb.py
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
  14 class NFBIE(InfoExtractor
): 
  16     IE_DESC 
= 'National Film Board of Canada' 
  17     _VALID_URL 
= r
'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' 
  20         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', 
  22             'id': 'qallunaat_why_white_people_are_funny', 
  24             'title': 'Qallunaat! Why White People Are Funny ', 
  25             'description': 'md5:6b8e32dde3abf91e58857b174916620c', 
  27             'creator': 'Mark Sandiford', 
  28             'uploader': 'Mark Sandiford', 
  32             'skip_download': True, 
  36     def _real_extract(self
, url
): 
  37         video_id 
= self
._match
_id
(url
) 
  39         config 
= self
._download
_xml
( 
  40             'https://www.nfb.ca/film/%s/player_config' % video_id
, 
  41             video_id
, 'Downloading player config XML', 
  42             data
=urlencode_postdata({'getConfig': 'true'}), 
  44                 'Content-Type': 'application/x-www-form-urlencoded', 
  45                 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' 
  48         title
, description
, thumbnail
, duration
, uploader
, author 
= [None] * 6 
  49         thumbnails
, formats 
= [[]] * 2 
  52         for media 
in config
.findall('./player/stream/media'): 
  53             if media
.get('type') == 'posterImage': 
  54                 quality_key 
= qualities(('low', 'high')) 
  56                 for asset 
in media
.findall('assets/asset'): 
  57                     asset_url 
= xpath_text(asset
, 'default/url', default
=None) 
  60                     quality 
= asset
.get('quality') 
  64                         'preference': quality_key(quality
), 
  66             elif media
.get('type') == 'video': 
  67                 title 
= xpath_text(media
, 'title', fatal
=True) 
  68                 for asset 
in media
.findall('assets/asset'): 
  69                     quality 
= asset
.get('quality') 
  70                     height 
= int_or_none(self
._search
_regex
( 
  71                         r
'^(\d+)[pP]$', quality 
or '', 'height', default
=None)) 
  73                         streamer 
= xpath_text(node
, 'streamerURI', default
=None) 
  76                         play_path 
= xpath_text(node
, 'url', default
=None) 
  81                             'app': streamer
.split('/', 3)[3], 
  82                             'play_path': play_path
, 
  85                             'format_id': '%s-%s' % (node
.tag
, quality
) if quality 
else node
.tag
, 
  88                 self
._sort
_formats
(formats
) 
  89                 description 
= clean_html(xpath_text(media
, 'description')) 
  90                 uploader 
= xpath_text(media
, 'author') 
  91                 duration 
= int_or_none(media
.get('duration')) 
  92                 for subtitle 
in media
.findall('./subtitles/subtitle'): 
  93                     subtitle_url 
= xpath_text(subtitle
, 'url', default
=None) 
  96                     lang 
= xpath_text(subtitle
, 'lang', default
='en') 
  97                     subtitles
.setdefault(lang
, []).append({ 
  99                         'ext': (subtitle
.get('format') or determine_ext(subtitle_url
)).lower(), 
 105             'description': description
, 
 106             'thumbnails': thumbnails
, 
 107             'duration': duration
, 
 109             'uploader': uploader
, 
 111             'subtitles': subtitles
,