]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nfb.py
1 from __future__
import unicode_literals
3 from .common
import InfoExtractor
14 class NFBIE(InfoExtractor
):
16 IE_DESC
= 'National Film Board of Canada'
17 _VALID_URL
= r
'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
20 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
22 'id': 'qallunaat_why_white_people_are_funny',
24 'title': 'Qallunaat! Why White People Are Funny ',
25 'description': 'md5:6b8e32dde3abf91e58857b174916620c',
27 'creator': 'Mark Sandiford',
28 'uploader': 'Mark Sandiford',
32 'skip_download': True,
36 def _real_extract(self
, url
):
37 video_id
= self
._match
_id
(url
)
39 config
= self
._download
_xml
(
40 'https://www.nfb.ca/film/%s/player_config' % video_id
,
41 video_id
, 'Downloading player config XML',
42 data
=urlencode_postdata({'getConfig': 'true'}),
44 'Content-Type': 'application/x-www-form-urlencoded',
45 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
48 title
, description
, thumbnail
, duration
, uploader
, author
= [None] * 6
49 thumbnails
, formats
= [[]] * 2
52 for media
in config
.findall('./player/stream/media'):
53 if media
.get('type') == 'posterImage':
54 quality_key
= qualities(('low', 'high'))
56 for asset
in media
.findall('assets/asset'):
57 asset_url
= xpath_text(asset
, 'default/url', default
=None)
60 quality
= asset
.get('quality')
64 'preference': quality_key(quality
),
66 elif media
.get('type') == 'video':
67 title
= xpath_text(media
, 'title', fatal
=True)
68 for asset
in media
.findall('assets/asset'):
69 quality
= asset
.get('quality')
70 height
= int_or_none(self
._search
_regex
(
71 r
'^(\d+)[pP]$', quality
or '', 'height', default
=None))
73 streamer
= xpath_text(node
, 'streamerURI', default
=None)
76 play_path
= xpath_text(node
, 'url', default
=None)
81 'app': streamer
.split('/', 3)[3],
82 'play_path': play_path
,
85 'format_id': '%s-%s' % (node
.tag
, quality
) if quality
else node
.tag
,
88 self
._sort
_formats
(formats
)
89 description
= clean_html(xpath_text(media
, 'description'))
90 uploader
= xpath_text(media
, 'author')
91 duration
= int_or_none(media
.get('duration'))
92 for subtitle
in media
.findall('./subtitles/subtitle'):
93 subtitle_url
= xpath_text(subtitle
, 'url', default
=None)
96 lang
= xpath_text(subtitle
, 'lang', default
='en')
97 subtitles
.setdefault(lang
, []).append({
99 'ext': (subtitle
.get('format') or determine_ext(subtitle_url
)).lower(),
105 'description': description
,
106 'thumbnails': thumbnails
,
107 'duration': duration
,
109 'uploader': uploader
,
111 'subtitles': subtitles
,