X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/af478477605bdf3f5d57562035885cfee905f379..14111bdbc4a22585fc51fd4efa3572ea003b5321:/youtube_dl/extractor/nfb.py?ds=sidebyside diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index e88566c..5bd15f7 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,18 +1,14 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - compat_urllib_request, - compat_urllib_parse, -) +from ..compat import compat_urllib_parse +from ..utils import sanitized_Request class NFBIE(InfoExtractor): IE_NAME = 'nfb' IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' _TEST = { 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', @@ -32,17 +28,18 @@ class NFBIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') + video_id = self._match_id(url) + page = self._download_webpage( + 'https://www.nfb.ca/film/%s' % video_id, video_id, + 'Downloading film page') uploader_id = self._html_search_regex(r'([^<]+)', - page, 'director name', fatal=False) + page, 'director name', fatal=False) - request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, + request = sanitized_Request( + 'https://www.nfb.ca/film/%s/player_config' % video_id, compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') @@ -73,14 +70,16 @@ class NFBIE(InfoExtractor): title = media.find('title').text description = media.find('description').text # It seems assets always go from lower to better quality, so no need to sort - formats = [{ - 'url': x.find('default/streamerURI').text, - 'app': x.find('default/streamerURI').text.split('/', 3)[3], - 'play_path': x.find('default/url').text, - 'rtmp_live': False, - 'ext': 'mp4', - 'format_id': x.get('quality'), - } for x in media.findall('assets/asset')] + for asset in media.findall('assets/asset'): + for x in asset: + formats.append({ + 'url': x.find('streamerURI').text, + 'app': x.find('streamerURI').text.split('/', 3)[3], + 'play_path': x.find('url').text, + 'rtmp_live': False, + 'ext': 'mp4', + 'format_id': '%s-%s' % (x.tag, asset.get('quality')), + }) return { 'id': video_id, @@ -91,4 +90,4 @@ class NFBIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'formats': formats, - } \ No newline at end of file + }