]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nfb.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  12 class NFBIE(InfoExtractor
): 
  14     IE_DESC 
= 'National Film Board of Canada' 
  15     _VALID_URL 
= r
'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' 
  18         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', 
  20             'id': 'qallunaat_why_white_people_are_funny', 
  22             'title': 'Qallunaat! Why White People Are Funny ', 
  23             'description': 'md5:836d8aff55e087d04d9f6df554d4e038', 
  25             'uploader': 'Mark Sandiford', 
  26             'uploader_id': 'mark-sandiford', 
  30             'skip_download': True, 
  34     def _real_extract(self
, url
): 
  35         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  36         video_id 
= mobj
.group('id') 
  38         page 
= self
._download
_webpage
('https://www.nfb.ca/film/%s' % video_id
, video_id
, 'Downloading film page') 
  40         uploader_id 
= self
._html
_search
_regex
(r
'<a class="director-link" href="/explore-all-directors/([^/]+)/"', 
  41             page
, 'director id', fatal
=False) 
  42         uploader 
= self
._html
_search
_regex
(r
'<em class="director-name" itemprop="name">([^<]+)</em>', 
  43             page
, 'director name', fatal
=False) 
  45         request 
= compat_urllib_request
.Request('https://www.nfb.ca/film/%s/player_config' % video_id
, 
  46             compat_urllib_parse
.urlencode({'getConfig': 'true'}).encode('ascii')) 
  47         request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
  48         request
.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') 
  50         config 
= self
._download
_xml
(request
, video_id
, 'Downloading player config XML') 
  58         def extract_thumbnail(media
): 
  60             for asset 
in media
.findall('assets/asset'): 
  61                 thumbnails
[asset
.get('quality')] = asset
.find('default/url').text
 
  64             if 'high' in thumbnails
: 
  65                 return thumbnails
['high'] 
  66             return list(thumbnails
.values())[0] 
  68         for media 
in config
.findall('./player/stream/media'): 
  69             if media
.get('type') == 'posterImage': 
  70                 thumbnail 
= extract_thumbnail(media
) 
  71             elif media
.get('type') == 'video': 
  72                 duration 
= int(media
.get('duration')) 
  73                 title 
= media
.find('title').text
 
  74                 description 
= media
.find('description').text
 
  75                 # It seems assets always go from lower to better quality, so no need to sort 
  76                 for asset 
in media
.findall('assets/asset'): 
  79                             'url': x
.find('streamerURI').text
, 
  80                             'app': x
.find('streamerURI').text
.split('/', 3)[3], 
  81                             'play_path': x
.find('url').text
, 
  84                             'format_id': '%s-%s' % (x
.tag
, asset
.get('quality')), 
  90             'description': description
, 
  91             'thumbnail': thumbnail
, 
  94             'uploader_id': uploader_id
,