X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9dc487f48b50767cf540fa36c3de2c386fd74c04..ced7488f6d3a519b2c1b1cbd31048743fb8285bd:/youtube_dl/extractor/ctvnews.py?ds=sidebyside diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dl/extractor/ctvnews.py new file mode 100644 index 0000000..1023b61 --- /dev/null +++ b/youtube_dl/extractor/ctvnews.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import orderedSet + + +class CTVNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P[0-9.]+)' + _TESTS = [{ + 'url': 'http://www.ctvnews.ca/video?clipId=901995', + 'md5': '10deb320dc0ccb8d01d34d12fc2ea672', + 'info_dict': { + 'id': '901995', + 'ext': 'mp4', + 'title': 'Extended: \'That person cannot be me\' Johnson says', + 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', + 'timestamp': 1467286284, + 'upload_date': '20160630', + } + }, { + 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', + 'info_dict': + { + 'id': '1.2966224', + }, + 'playlist_mincount': 19, + }, { + 'url': 'http://www.ctvnews.ca/video?binId=1.2876780', + 'info_dict': + { + 'id': '1.2876780', + }, + 'playlist_mincount': 100, + }, { + 'url': 'http://www.ctvnews.ca/1.810401', + 'only_matching': True, + }, { + 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231', + 'only_matching': True, + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + + def ninecninemedia_url_result(clip_id): + return { + '_type': 'url_transparent', + 'id': clip_id, + 'url': '9c9media:ctvnews_web:%s' % clip_id, + 'ie_key': 'NineCNineMedia', + } + + if page_id.isdigit(): + return ninecninemedia_url_result(page_id) + else: + webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ + 'ot': 'example.AjaxPageLayout.ot', + 'maxItemsPerPage': 1000000, + }) + entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( + re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] + return self.playlist_result(entries, page_id)