X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/0cf0312991a54458a07e903da2e47e9f3c8855ae..76d85602f8a22ca3817c3a86f4f0e8969c0b02a9:/youtube_dl/extractor/nhl.py?ds=sidebyside diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index bdcf7e2..e98a5ef 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -2,11 +2,13 @@ from __future__ import unicode_literals import re import json +import os from .common import InfoExtractor from ..compat import ( compat_urlparse, compat_urllib_parse, + compat_urllib_parse_urlparse ) from ..utils import ( unified_strdate, @@ -18,15 +20,27 @@ class NHLBaseInfoExtractor(InfoExtractor): def _fix_json(json_string): return json_string.replace('\\\'', '\'') + def _real_extract_video(self, video_id): + vid_parts = video_id.split(',') + if len(vid_parts) == 3: + video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0')) + json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id + data = self._download_json( + json_url, video_id, transform_source=self._fix_json) + return self._extract_video(data[0]) + def _extract_video(self, info): video_id = info['id'] self.report_extraction(video_id) initial_video_url = info['publishPoint'] if info['formats'] == '1': + parsed_url = compat_urllib_parse_urlparse(initial_video_url) + filename, ext = os.path.splitext(parsed_url.path) + path = '%s_sd%s' % (filename, ext) data = compat_urllib_parse.urlencode({ 'type': 'fvod', - 'path': initial_video_url.replace('.mp4', '_sd.mp4'), + 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:]) }) path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data path_doc = self._download_xml( @@ -36,7 +50,7 @@ class NHLBaseInfoExtractor(InfoExtractor): video_url = initial_video_url join = compat_urlparse.urljoin - return { + ret = { 'id': video_id, 'title': info['name'], 'url': video_url, @@ -45,11 +59,20 @@ class NHLBaseInfoExtractor(InfoExtractor): 'thumbnail': join(join(video_url, '/u/'), info['bigImage']), 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), } + if video_url.startswith('rtmp:'): + mobj = re.match(r'(?Prtmp://[^/]+/(?P[a-z0-9/]+))/(?Pmp4:.*)', video_url) + ret.update({ + 'tc_url': mobj.group('tc_url'), + 'play_path': mobj.group('play_path'), + 'app': mobj.group('app'), + 'no_resume': True, + }) + return ret class NHLIE(NHLBaseInfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P[0-9a-z-]+)' + _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P[-0-9a-zA-Z,]+)' _TESTS = [{ 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', @@ -73,24 +96,100 @@ class NHLIE(NHLBaseInfoExtractor): 'duration': 0, 'upload_date': '20141011', }, + }, { + 'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802', + 'md5': 'c78fc64ea01777e426cfc202b746c825', + 'info_dict': { + 'id': '58665', + 'ext': 'flv', + 'title': 'Classic Game In Six - April 22, 1979', + 'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.', + 'duration': 400, + 'upload_date': '20100129' + }, }, { 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', 'only_matching': True, + }, { + 'url': 'http://video.nhl.com/videocenter/?id=736722', + 'only_matching': True, + }, { + 'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en', + 'md5': '076fcb88c255154aacbf0a7accc3f340', + 'info_dict': { + 'id': '2014020299-X-h', + 'ext': 'mp4', + 'title': 'Penguins at Islanders / Game Highlights', + 'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014', + 'duration': 268, + 'upload_date': '20141122', + } + }, { + 'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4', + 'info_dict': { + 'id': '691469', + 'ext': 'mp4', + 'title': 'RAW | Craig MacTavish Full Press Conference', + 'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.', + 'upload_date': '20141205', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump + } + }, { + 'url': 'http://video.nhl.com/videocenter/embed?playlist=836127', + 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id - data = self._download_json( - json_url, video_id, transform_source=self._fix_json) - return self._extract_video(data[0]) + video_id = self._match_id(url) + return self._real_extract_video(video_id) + + +class NHLNewsIE(NHLBaseInfoExtractor): + IE_NAME = 'nhl.com:news' + IE_DESC = 'NHL news' + _VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P[-0-9a-zA-Z]+)' + + _TESTS = [{ + 'url': 'http://www.nhl.com/ice/news.htm?id=750727', + 'md5': '4b3d1262e177687a3009937bd9ec0be8', + 'info_dict': { + 'id': '736722', + 'ext': 'mp4', + 'title': 'Cal Clutterbuck has been fined $2,000', + 'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6', + 'duration': 37, + 'upload_date': '20150128', + }, + }, { + # iframe embed + 'url': 'http://sabres.nhl.com/club/news.htm?id=780189', + 'md5': '9f663d1c006c90ac9fb82777d4294e12', + 'info_dict': { + 'id': '836127', + 'ext': 'mp4', + 'title': 'Morning Skate: OTT vs. BUF (9/23/15)', + 'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.", + 'duration': 93, + 'upload_date': '20150923', + }, + }] + + def _real_extract(self, url): + news_id = self._match_id(url) + webpage = self._download_webpage(url, news_id) + video_id = self._search_regex( + [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'", + r']+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'], + webpage, 'video id') + return self._real_extract_video(video_id) class NHLVideocenterIE(NHLBaseInfoExtractor): IE_NAME = 'nhl.com:videocenter' IE_DESC = 'NHL videocenter category' - _VALID_URL = r'https?://video\.(?P[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P[0-9]+)(?![&?]id=).*?)?$' + _VALID_URL = r'https?://video\.(?P[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P[0-9]+)(?![&?]id=).*?)?$' _TEST = { 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999', 'info_dict': {