X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/af478477605bdf3f5d57562035885cfee905f379..382a868cee069f08aacf0b89c9d689ec420d6b2c:/youtube_dl/extractor/nfb.py diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index e88566c..ea07725 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_parse, ) @@ -12,7 +10,7 @@ from ..utils import ( class NFBIE(InfoExtractor): IE_NAME = 'nfb' IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' _TEST = { 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', @@ -32,18 +30,18 @@ class NFBIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') + video_id = self._match_id(url) + page = self._download_webpage( + 'https://www.nfb.ca/film/%s' % video_id, video_id, + 'Downloading film page') uploader_id = self._html_search_regex(r'([^<]+)', - page, 'director name', fatal=False) + page, 'director name', fatal=False) request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, - compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) + compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') @@ -73,14 +71,16 @@ class NFBIE(InfoExtractor): title = media.find('title').text description = media.find('description').text # It seems assets always go from lower to better quality, so no need to sort - formats = [{ - 'url': x.find('default/streamerURI').text, - 'app': x.find('default/streamerURI').text.split('/', 3)[3], - 'play_path': x.find('default/url').text, - 'rtmp_live': False, - 'ext': 'mp4', - 'format_id': x.get('quality'), - } for x in media.findall('assets/asset')] + for asset in media.findall('assets/asset'): + for x in asset: + formats.append({ + 'url': x.find('streamerURI').text, + 'app': x.find('streamerURI').text.split('/', 3)[3], + 'play_path': x.find('url').text, + 'rtmp_live': False, + 'ext': 'mp4', + 'format_id': '%s-%s' % (x.tag, asset.get('quality')), + }) return { 'id': video_id, @@ -91,4 +91,4 @@ class NFBIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'formats': formats, - } \ No newline at end of file + }