X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/453698570f26bebd37b39df8537d993b57d77b8b..67afe88251ff8b1fcb43246c9a4bb1a0ea0185a2:/youtube_dl/extractor/nhl.py?ds=inline diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index d3a4fc5..279b183 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -20,6 +20,15 @@ class NHLBaseInfoExtractor(InfoExtractor): def _fix_json(json_string): return json_string.replace('\\\'', '\'') + def _real_extract_video(self, video_id): + vid_parts = video_id.split(',') + if len(vid_parts) == 3: + video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0')) + json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id + data = self._download_json( + json_url, video_id, transform_source=self._fix_json) + return self._extract_video(data[0]) + def _extract_video(self, info): video_id = info['id'] self.report_extraction(video_id) @@ -41,7 +50,7 @@ class NHLBaseInfoExtractor(InfoExtractor): video_url = initial_video_url join = compat_urlparse.urljoin - return { + ret = { 'id': video_id, 'title': info['name'], 'url': video_url, @@ -50,11 +59,20 @@ class NHLBaseInfoExtractor(InfoExtractor): 'thumbnail': join(join(video_url, '/u/'), info['bigImage']), 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), } + if video_url.startswith('rtmp:'): + mobj = re.match(r'(?Prtmp://[^/]+/(?P[a-z0-9/]+))/(?Pmp4:.*)', video_url) + ret.update({ + 'tc_url': mobj.group('tc_url'), + 'play_path': mobj.group('play_path'), + 'app': mobj.group('app'), + 'no_resume': True, + }) + return ret class NHLIE(NHLBaseInfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P[-0-9a-zA-Z]+)' + _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P[-0-9a-zA-Z,]+)' _TESTS = [{ 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', @@ -92,15 +110,64 @@ class NHLIE(NHLBaseInfoExtractor): }, { 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', 'only_matching': True, + }, { + 'url': 'http://video.nhl.com/videocenter/?id=736722', + 'only_matching': True, + }, { + 'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en', + 'md5': '076fcb88c255154aacbf0a7accc3f340', + 'info_dict': { + 'id': '2014020299-X-h', + 'ext': 'mp4', + 'title': 'Penguins at Islanders / Game Highlights', + 'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014', + 'duration': 268, + 'upload_date': '20141122', + } + }, { + 'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4', + 'info_dict': { + 'id': '691469', + 'ext': 'mp4', + 'title': 'RAW | Craig MacTavish Full Press Conference', + 'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.', + 'upload_date': '20141205', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump + } }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id - data = self._download_json( - json_url, video_id, transform_source=self._fix_json) - return self._extract_video(data[0]) + video_id = self._match_id(url) + return self._real_extract_video(video_id) + + +class NHLNewsIE(NHLBaseInfoExtractor): + IE_NAME = 'nhl.com:news' + IE_DESC = 'NHL news' + _VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P[-0-9a-zA-Z]+)' + + _TEST = { + 'url': 'http://www.nhl.com/ice/news.htm?id=750727', + 'md5': '4b3d1262e177687a3009937bd9ec0be8', + 'info_dict': { + 'id': '736722', + 'ext': 'mp4', + 'title': 'Cal Clutterbuck has been fined $2,000', + 'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6', + 'duration': 37, + 'upload_date': '20150128', + }, + } + + def _real_extract(self, url): + news_id = self._match_id(url) + webpage = self._download_webpage(url, news_id) + video_id = self._search_regex( + [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"], + webpage, 'video id') + return self._real_extract_video(video_id) class NHLVideocenterIE(NHLBaseInfoExtractor):