]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nfb.py
5bd15f7a72f5aeb49d91391e11ddffaa1a52f44f
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
   4 from ..compat 
import compat_urllib_parse
 
   5 from ..utils 
import sanitized_Request
 
   8 class NFBIE(InfoExtractor
): 
  10     IE_DESC 
= 'National Film Board of Canada' 
  11     _VALID_URL 
= r
'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' 
  14         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', 
  16             'id': 'qallunaat_why_white_people_are_funny', 
  18             'title': 'Qallunaat! Why White People Are Funny ', 
  19             'description': 'md5:836d8aff55e087d04d9f6df554d4e038', 
  21             'uploader': 'Mark Sandiford', 
  22             'uploader_id': 'mark-sandiford', 
  26             'skip_download': True, 
  30     def _real_extract(self
, url
): 
  31         video_id 
= self
._match
_id
(url
) 
  32         page 
= self
._download
_webpage
( 
  33             'https://www.nfb.ca/film/%s' % video_id
, video_id
, 
  34             'Downloading film page') 
  36         uploader_id 
= self
._html
_search
_regex
(r
'<a class="director-link" href="/explore-all-directors/([^/]+)/"', 
  37                                               page
, 'director id', fatal
=False) 
  38         uploader 
= self
._html
_search
_regex
(r
'<em class="director-name" itemprop="name">([^<]+)</em>', 
  39                                            page
, 'director name', fatal
=False) 
  41         request 
= sanitized_Request( 
  42             'https://www.nfb.ca/film/%s/player_config' % video_id
, 
  43             compat_urllib_parse
.urlencode({'getConfig': 'true'}).encode('ascii')) 
  44         request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
  45         request
.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') 
  47         config 
= self
._download
_xml
(request
, video_id
, 'Downloading player config XML') 
  55         def extract_thumbnail(media
): 
  57             for asset 
in media
.findall('assets/asset'): 
  58                 thumbnails
[asset
.get('quality')] = asset
.find('default/url').text
 
  61             if 'high' in thumbnails
: 
  62                 return thumbnails
['high'] 
  63             return list(thumbnails
.values())[0] 
  65         for media 
in config
.findall('./player/stream/media'): 
  66             if media
.get('type') == 'posterImage': 
  67                 thumbnail 
= extract_thumbnail(media
) 
  68             elif media
.get('type') == 'video': 
  69                 duration 
= int(media
.get('duration')) 
  70                 title 
= media
.find('title').text
 
  71                 description 
= media
.find('description').text
 
  72                 # It seems assets always go from lower to better quality, so no need to sort 
  73                 for asset 
in media
.findall('assets/asset'): 
  76                             'url': x
.find('streamerURI').text
, 
  77                             'app': x
.find('streamerURI').text
.split('/', 3)[3], 
  78                             'play_path': x
.find('url').text
, 
  81                             'format_id': '%s-%s' % (x
.tag
, asset
.get('quality')), 
  87             'description': description
, 
  88             'thumbnail': thumbnail
, 
  91             'uploader_id': uploader_id
,